~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Martin Pool
Date: 2008-10-20 08:09:33 UTC
mto: This revision was merged to the branch mainline in revision 3787.
Revision ID: mbp@sourcefrog.net-20081020080933-xba7zw9ffozm6brl

Build zip file from 'make dist' and document this; also tweak standard announcement mail

files added:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/emacs

contrib/emacs/bzr-mode.el

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files renamed:
bzrlib/changeset.py => bzrlib/bundle/__init__.py

plugins/ => bzrlib/plugins/

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionspec.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

bzrlib/xml.py => bzrlib/xml_serializer.py

bzrlib/newinventory.py => contrib/newinventory.py

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Indexing facilities."""

__all__ = [

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from bisect import bisect_right

from cStringIO import StringIO

import re

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.bisect_multi import bisect_multi_bytes

from bzrlib.revision import NULL_REVISION

from bzrlib.trace import mutter

""")

from bzrlib import (

debug,

errors,

symbol_versioning,

)

_HEADER_READV = (0, 200)

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')

_newline_null_re = re.compile('[\n\0]')

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

The resulting graph has the structure:

_SIGNATURE OPTIONS NODES NEWLINE

_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE

OPTIONS := 'node_ref_lists=' DIGITS NEWLINE

NODES := NODE*

NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

KEY := Not-whitespace-utf8

ABSENT := 'a'

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

REFERENCE := DIGITS ; digits is the byte offset in the index of the

; referenced key.

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

# A dict of {key: (absent, ref_lists, value)}

self._nodes = {}

self._nodes_by_key = None

self._key_length = key_elements

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

def _get_nodes_by_key(self):

if self._nodes_by_key is None:

100

nodes_by_key = {}

101

if self.reference_lists:

102

for key, (absent, references, value) in self._nodes.iteritems():

103

if absent:

104

continue

105

key_dict = nodes_by_key

106

for subkey in key[:-1]:

107

key_dict = key_dict.setdefault(subkey, {})

108

key_dict[key[-1]] = key, value, references

109

else:

110

for key, (absent, references, value) in self._nodes.iteritems():

111

if absent:

112

continue

113

key_dict = nodes_by_key

114

for subkey in key[:-1]:

115

key_dict = key_dict.setdefault(subkey, {})

116

key_dict[key[-1]] = key, value

117

self._nodes_by_key = nodes_by_key

118

return self._nodes_by_key

119

120

def _update_nodes_by_key(self, key, value, node_refs):

121

"""Update the _nodes_by_key dict with a new key.

122

123

For a key of (foo, bar, baz) create

124

_nodes_by_key[foo][bar][baz] = key_value

125

"""

126

if self._nodes_by_key is None:

127

return

128

key_dict = self._nodes_by_key

129

if self.reference_lists:

130

key_value = key, value, node_refs

131

else:

132

key_value = key, value

133

for subkey in key[:-1]:

134

key_dict = key_dict.setdefault(subkey, {})

135

key_dict[key[-1]] = key_value

136

137

def _check_key_ref_value(self, key, references, value):

138

"""Check that 'key' and 'references' are all valid.

139

140

:param key: A key tuple. Must conform to the key interface (be a tuple,

141

be of the right length, not have any whitespace or nulls in any key

142

element.)

143

:param references: An iterable of reference lists. Something like

144

[[(ref, key)], [(ref, key), (other, key)]]

145

:param value: The value associate with this key. Must not contain

146

newlines or null characters.

147

:return: (node_refs, absent_references)

148

node_refs basically a packed form of 'references' where all

149

iterables are tuples

150

absent_references reference keys that are not in self._nodes.

151

This may contain duplicates if the same key is

152

referenced in multiple lists.

153

"""

154

self._check_key(key)

155

if _newline_null_re.search(value) is not None:

156

raise errors.BadIndexValue(value)

157

if len(references) != self.reference_lists:

158

raise errors.BadIndexValue(references)

159

node_refs = []

160

absent_references = []

161

for reference_list in references:

162

for reference in reference_list:

163

# If reference *is* in self._nodes, then we know it has already

164

# been checked.

165

if reference not in self._nodes:

166

self._check_key(reference)

167

absent_references.append(reference)

168

node_refs.append(tuple(reference_list))

169

return tuple(node_refs), absent_references

170

171

def add_node(self, key, value, references=()):

172

"""Add a node to the index.

173

174

:param key: The key. keys are non-empty tuples containing

175

as many whitespace-free utf8 bytestrings as the key length

176

defined for this index.

177

:param references: An iterable of iterables of keys. Each is a

178

reference to another key.

179

:param value: The value to associate with the key. It may be any

180

bytes as long as it does not contain \0 or \n.

181

"""

182

(node_refs,

183

absent_references) = self._check_key_ref_value(key, references, value)

184

if key in self._nodes and self._nodes[key][0] != 'a':

185

raise errors.BadIndexDuplicateKey(key, self)

186

for reference in absent_references:

187

# There may be duplicates, but I don't think it is worth worrying

188

# about

189

self._nodes[reference] = ('a', (), '')

190

self._nodes[key] = ('', node_refs, value)

191

self._keys.add(key)

192

if self._nodes_by_key is not None and self._key_length > 1:

193

self._update_nodes_by_key(key, value, node_refs)

194

195

def finish(self):

196

lines = [_SIGNATURE]

197

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

198

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

199

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

200

prefix_length = sum(len(x) for x in lines)

201

# references are byte offsets. To avoid having to do nasty

202

# polynomial work to resolve offsets (references to later in the

203

# file cannot be determined until all the inbetween references have

204

# been calculated too) we pad the offsets with 0's to make them be

205

# of consistent length. Using binary offsets would break the trivial

206

# file parsing.

207

# to calculate the width of zero's needed we do three passes:

208

# one to gather all the non-reference data and the number of references.

209

# one to pad all the data with reference-length and determine entry

210

# addresses.

211

# One to serialise.

212

213

# forward sorted by key. In future we may consider topological sorting,

214

# at the cost of table scans for direct lookup, or a second index for

215

# direct lookup

216

nodes = sorted(self._nodes.items())

217

# if we do not prepass, we don't know how long it will be up front.

218

expected_bytes = None

219

# we only need to pre-pass if we have reference lists at all.

220

if self.reference_lists:

221

key_offset_info = []

222

non_ref_bytes = prefix_length

223

total_references = 0

224

# TODO use simple multiplication for the constants in this loop.

225

for key, (absent, references, value) in nodes:

226

# record the offset known *so far* for this key:

227

# the non reference bytes to date, and the total references to

228

# date - saves reaccumulating on the second pass

229

key_offset_info.append((key, non_ref_bytes, total_references))

230

# key is literal, value is literal, there are 3 null's, 1 NL

231

# key is variable length tuple, \x00 between elements

232

non_ref_bytes += sum(len(element) for element in key)

233

if self._key_length > 1:

234

non_ref_bytes += self._key_length - 1

235

# value is literal bytes, there are 3 null's, 1 NL.

236

non_ref_bytes += len(value) + 3 + 1

237

# one byte for absent if set.

238

if absent:

239

non_ref_bytes += 1

240

elif self.reference_lists:

241

# (ref_lists -1) tabs

242

non_ref_bytes += self.reference_lists - 1

243

# (ref-1 cr's per ref_list)

244

for ref_list in references:

245

# how many references across the whole file?

246

total_references += len(ref_list)

247

# accrue reference separators

248

if ref_list:

249

non_ref_bytes += len(ref_list) - 1

250

# how many digits are needed to represent the total byte count?

251

digits = 1

252

possible_total_bytes = non_ref_bytes + total_references*digits

253

while 10 ** digits < possible_total_bytes:

254

digits += 1

255

possible_total_bytes = non_ref_bytes + total_references*digits

256

expected_bytes = possible_total_bytes + 1 # terminating newline

257

# resolve key addresses.

258

key_addresses = {}

259

for key, non_ref_bytes, total_references in key_offset_info:

260

key_addresses[key] = non_ref_bytes + total_references*digits

261

# serialise

262

format_string = '%%0%sd' % digits

263

for key, (absent, references, value) in nodes:

264

flattened_references = []

265

for ref_list in references:

266

ref_addresses = []

267

for reference in ref_list:

268

ref_addresses.append(format_string % key_addresses[reference])

269

flattened_references.append('\r'.join(ref_addresses))

270

string_key = '\x00'.join(key)

271

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

272

'\t'.join(flattened_references), value))

273

lines.append('\n')

274

result = StringIO(''.join(lines))

275

if expected_bytes and len(result.getvalue()) != expected_bytes:

276

raise errors.BzrError('Failed index creation. Internal error:'

277

' mismatched output length and expected length: %d %d' %

278

(len(result.getvalue()), expected_bytes))

279

return result

280

281

282

class GraphIndex(object):

283

"""An index for data with embedded graphs.

284

285

The index maps keys to a list of key reference lists, and a value.

286

Each node has the same number of key reference lists. Each key reference

287

list can be empty or an arbitrary length. The value is an opaque NULL

288

terminated string without any newlines. The storage of the index is

289

hidden in the interface: keys and key references are always tuples of

290

bytestrings, never the internal representation (e.g. dictionary offsets).

291

292

It is presumed that the index will not be mutated - it is static data.

293

294

Successive iter_all_entries calls will read the entire index each time.

295

Additionally, iter_entries calls will read the index linearly until the

296

desired keys are found. XXX: This must be fixed before the index is

297

suitable for production use. :XXX

298

"""

299

300

def __init__(self, transport, name, size):

301

"""Open an index called name on transport.

302

303

:param transport: A bzrlib.transport.Transport.

304

:param name: A path to provide to transport API calls.

305

:param size: The size of the index in bytes. This is used for bisection

306

logic to perform partial index reads. While the size could be

307

obtained by statting the file this introduced an additional round

308

trip as well as requiring stat'able transports, both of which are

309

avoided by having it supplied. If size is None, then bisection

310

support will be disabled and accessing the index will just stream

311

all the data.

312

"""

313

self._transport = transport

314

self._name = name

315

# Becomes a dict of key:(value, reference-list-byte-locations) used by

316

# the bisection interface to store parsed but not resolved keys.

317

self._bisect_nodes = None

318

# Becomes a dict of key:(value, reference-list-keys) which are ready to

319

# be returned directly to callers.

320

self._nodes = None

321

# a sorted list of slice-addresses for the parsed bytes of the file.

322

# e.g. (0,1) would mean that byte 0 is parsed.

323

self._parsed_byte_map = []

324

# a sorted list of keys matching each slice address for parsed bytes

325

# e.g. (None, 'foo@bar') would mean that the first byte contained no

326

# key, and the end byte of the slice is the of the data for 'foo@bar'

327

self._parsed_key_map = []

328

self._key_count = None

329

self._keys_by_offset = None

330

self._nodes_by_key = None

331

self._size = size

332

# The number of bytes we've read so far in trying to process this file

333

self._bytes_read = 0

334

335

def __eq__(self, other):

336

"""Equal when self and other were created with the same parameters."""

337

return (

338

type(self) == type(other) and

339

self._transport == other._transport and

340

self._name == other._name and

341

self._size == other._size)

342

343

def __ne__(self, other):

344

return not self.__eq__(other)

345

346

def __repr__(self):

347

return "%s(%r)" % (self.__class__.__name__,

348

self._transport.abspath(self._name))

349

350

def _buffer_all(self, stream=None):

351

"""Buffer all the index data.

352

353

Mutates self._nodes and self.keys_by_offset.

354

"""

355

if self._nodes is not None:

356

# We already did this

357

return

358

if 'index' in debug.debug_flags:

359

mutter('Reading entire index %s', self._transport.abspath(self._name))

360

if stream is None:

361

stream = self._transport.get(self._name)

362

self._read_prefix(stream)

363

self._expected_elements = 3 + self._key_length

364

line_count = 0

365

# raw data keyed by offset

366

self._keys_by_offset = {}

367

# ready-to-return key:value or key:value, node_ref_lists

368

self._nodes = {}

369

self._nodes_by_key = None

370

trailers = 0

371

pos = stream.tell()

372

lines = stream.read().split('\n')

373

del lines[-1]

374

_, _, _, trailers = self._parse_lines(lines, pos)

375

for key, absent, references, value in self._keys_by_offset.itervalues():

376

if absent:

377

continue

378

# resolve references:

379

if self.node_ref_lists:

380

node_value = (value, self._resolve_references(references))

381

else:

382

node_value = value

383

self._nodes[key] = node_value

384

# cache the keys for quick set intersections

385

self._keys = set(self._nodes)

386

if trailers != 1:

387

# there must be one line - the empty trailer line.

388

raise errors.BadIndexData(self)

389

390

def _get_nodes_by_key(self):

391

if self._nodes_by_key is None:

392

nodes_by_key = {}

393

if self.node_ref_lists:

394

for key, (value, references) in self._nodes.iteritems():

395

key_dict = nodes_by_key

396

for subkey in key[:-1]:

397

key_dict = key_dict.setdefault(subkey, {})

398

key_dict[key[-1]] = key, value, references

399

else:

400

for key, value in self._nodes.iteritems():

401

key_dict = nodes_by_key

402

for subkey in key[:-1]:

403

key_dict = key_dict.setdefault(subkey, {})

404

key_dict[key[-1]] = key, value

405

self._nodes_by_key = nodes_by_key

406

return self._nodes_by_key

407

408

def iter_all_entries(self):

409

"""Iterate over all keys within the index.

410

411

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

412

The former tuple is used when there are no reference lists in the

413

index, making the API compatible with simple key:value index types.

414

There is no defined order for the result iteration - it will be in

415

the most efficient order for the index.

416

"""

417

if 'evil' in debug.debug_flags:

418

trace.mutter_callsite(3,

419

"iter_all_entries scales with size of history.")

420

if self._nodes is None:

421

self._buffer_all()

422

if self.node_ref_lists:

423

for key, (value, node_ref_lists) in self._nodes.iteritems():

424

yield self, key, value, node_ref_lists

425

else:

426

for key, value in self._nodes.iteritems():

427

yield self, key, value

428

429

def _read_prefix(self, stream):

430

signature = stream.read(len(self._signature()))

431

if not signature == self._signature():

432

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

433

options_line = stream.readline()

434

if not options_line.startswith(_OPTION_NODE_REFS):

435

raise errors.BadIndexOptions(self)

436

try:

437

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

438

except ValueError:

439

raise errors.BadIndexOptions(self)

440

options_line = stream.readline()

441

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

442

raise errors.BadIndexOptions(self)

443

try:

444

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

445

except ValueError:

446

raise errors.BadIndexOptions(self)

447

options_line = stream.readline()

448

if not options_line.startswith(_OPTION_LEN):

449

raise errors.BadIndexOptions(self)

450

try:

451

self._key_count = int(options_line[len(_OPTION_LEN):-1])

452

except ValueError:

453

raise errors.BadIndexOptions(self)

454

455

def _resolve_references(self, references):

456

"""Return the resolved key references for references.

457

458

References are resolved by looking up the location of the key in the

459

_keys_by_offset map and substituting the key name, preserving ordering.

460

461

:param references: An iterable of iterables of key locations. e.g.

462

[[123, 456], [123]]

463

:return: A tuple of tuples of keys.

464

"""

465

node_refs = []

466

for ref_list in references:

467

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

468

return tuple(node_refs)

469

470

def _find_index(self, range_map, key):

471

"""Helper for the _parsed_*_index calls.

472

473

Given a range map - [(start, end), ...], finds the index of the range

474

in the map for key if it is in the map, and if it is not there, the

475

immediately preceeding range in the map.

476

"""

477

result = bisect_right(range_map, key) - 1

478

if result + 1 < len(range_map):

479

# check the border condition, it may be in result + 1

480

if range_map[result + 1][0] == key[0]:

481

return result + 1

482

return result

483

484

def _parsed_byte_index(self, offset):

485

"""Return the index of the entry immediately before offset.

486

487

e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that

488

there is one unparsed byte (the 11th, addressed as[10]). then:

489

asking for 0 will return 0

490

asking for 10 will return 0

491

asking for 11 will return 1

492

asking for 12 will return 1

493

"""

494

key = (offset, 0)

495

return self._find_index(self._parsed_byte_map, key)

496

497

def _parsed_key_index(self, key):

498

"""Return the index of the entry immediately before key.

499

500

e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,

501

meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive

502

have been parsed, then:

503

asking for '' will return 0

504

asking for 'a' will return 0

505

asking for 'b' will return 1

506

asking for 'e' will return 1

507

"""

508

search_key = (key, None)

509

return self._find_index(self._parsed_key_map, search_key)

510

511

def _is_parsed(self, offset):

512

"""Returns True if offset has been parsed."""

513

index = self._parsed_byte_index(offset)

514

if index == len(self._parsed_byte_map):

515

return offset < self._parsed_byte_map[index - 1][1]

516

start, end = self._parsed_byte_map[index]

517

return offset >= start and offset < end

518

519

def _iter_entries_from_total_buffer(self, keys):

520

"""Iterate over keys when the entire index is parsed."""

521

keys = keys.intersection(self._keys)

522

if self.node_ref_lists:

523

for key in keys:

524

value, node_refs = self._nodes[key]

525

yield self, key, value, node_refs

526

else:

527

for key in keys:

528

yield self, key, self._nodes[key]

529

530

def iter_entries(self, keys):

531

"""Iterate over keys within the index.

532

533

:param keys: An iterable providing the keys to be retrieved.

534

:return: An iterable as per iter_all_entries, but restricted to the

535

keys supplied. No additional keys will be returned, and every

536

key supplied that is in the index will be returned.

537

"""

538

keys = set(keys)

539

if not keys:

540

return []

541

if self._size is None and self._nodes is None:

542

self._buffer_all()

543

544

# We fit about 20 keys per minimum-read (4K), so if we are looking for

545

# more than 1/20th of the index its likely (assuming homogenous key

546

# spread) that we'll read the entire index. If we're going to do that,

547

# buffer the whole thing. A better analysis might take key spread into

548

# account - but B+Tree indices are better anyway.

549

# We could look at all data read, and use a threshold there, which will

550

# trigger on ancestry walks, but that is not yet fully mapped out.

551

if self._nodes is None and len(keys) * 20 > self.key_count():

552

self._buffer_all()

553

if self._nodes is not None:

554

return self._iter_entries_from_total_buffer(keys)

555

else:

556

return (result[1] for result in bisect_multi_bytes(

557

self._lookup_keys_via_location, self._size, keys))

558

559

def iter_entries_prefix(self, keys):

560

"""Iterate over keys within the index using prefix matching.

561

562

Prefix matching is applied within the tuple of a key, not to within

563

the bytestring of each key element. e.g. if you have the keys ('foo',

564

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

565

only the former key is returned.

566

567

WARNING: Note that this method currently causes a full index parse

568

unconditionally (which is reasonably appropriate as it is a means for

569

thunking many small indices into one larger one and still supplies

570

iter_all_entries at the thunk layer).

571

572

:param keys: An iterable providing the key prefixes to be retrieved.

573

Each key prefix takes the form of a tuple the length of a key, but

574

with the last N elements 'None' rather than a regular bytestring.

575

The first element cannot be 'None'.

576

:return: An iterable as per iter_all_entries, but restricted to the

577

keys with a matching prefix to those supplied. No additional keys

578

will be returned, and every match that is in the index will be

579

returned.

580

"""

581

keys = set(keys)

582

if not keys:

583

return

584

# load data - also finds key lengths

585

if self._nodes is None:

586

self._buffer_all()

587

if self._key_length == 1:

588

for key in keys:

589

# sanity check

590

if key[0] is None:

591

raise errors.BadIndexKey(key)

592

if len(key) != self._key_length:

593

raise errors.BadIndexKey(key)

594

if self.node_ref_lists:

595

value, node_refs = self._nodes[key]

596

yield self, key, value, node_refs

597

else:

598

yield self, key, self._nodes[key]

599

return

600

nodes_by_key = self._get_nodes_by_key()

601

for key in keys:

602

# sanity check

603

if key[0] is None:

604

raise errors.BadIndexKey(key)

605

if len(key) != self._key_length:

606

raise errors.BadIndexKey(key)

607

# find what it refers to:

608

key_dict = nodes_by_key

609

elements = list(key)

610

# find the subdict whose contents should be returned.

611

try:

612

while len(elements) and elements[0] is not None:

613

key_dict = key_dict[elements[0]]

614

elements.pop(0)

615

except KeyError:

616

# a non-existant lookup.

617

continue

618

if len(elements):

619

dicts = [key_dict]

620

while dicts:

621

key_dict = dicts.pop(-1)

622

# can't be empty or would not exist

623

item, value = key_dict.iteritems().next()

624

if type(value) == dict:

625

# push keys

626

dicts.extend(key_dict.itervalues())

627

else:

628

# yield keys

629

for value in key_dict.itervalues():

630

# each value is the key:value:node refs tuple

631

# ready to yield.

632

yield (self, ) + value

633

else:

634

# the last thing looked up was a terminal element

635

yield (self, ) + key_dict

636

637

def key_count(self):

638

"""Return an estimate of the number of keys in this index.

639

640

For GraphIndex the estimate is exact.

641

"""

642

if self._key_count is None:

643

self._read_and_parse([_HEADER_READV])

644

return self._key_count

645

646

def _lookup_keys_via_location(self, location_keys):

647

"""Public interface for implementing bisection.

648

649

If _buffer_all has been called, then all the data for the index is in

650

memory, and this method should not be called, as it uses a separate

651

cache because it cannot pre-resolve all indices, which buffer_all does

652

for performance.

653

654

:param location_keys: A list of location(byte offset), key tuples.

655

:return: A list of (location_key, result) tuples as expected by

656

bzrlib.bisect_multi.bisect_multi_bytes.

657

"""

658

# Possible improvements:

659

# - only bisect lookup each key once

660

# - sort the keys first, and use that to reduce the bisection window

661

# -----

662

# this progresses in three parts:

663

# read data

664

# parse it

665

# attempt to answer the question from the now in memory data.

666

# build the readv request

667

# for each location, ask for 800 bytes - much more than rows we've seen

668

# anywhere.

669

readv_ranges = []

670

for location, key in location_keys:

671

# can we answer from cache?

672

if self._bisect_nodes and key in self._bisect_nodes:

673

# We have the key parsed.

674

continue

675

index = self._parsed_key_index(key)

676

if (len(self._parsed_key_map) and

677

self._parsed_key_map[index][0] <= key and

678

(self._parsed_key_map[index][1] >= key or

679

# end of the file has been parsed

680

self._parsed_byte_map[index][1] == self._size)):

681

# the key has been parsed, so no lookup is needed even if its

682

# not present.

683

continue

684

# - if we have examined this part of the file already - yes

685

index = self._parsed_byte_index(location)

686

if (len(self._parsed_byte_map) and

687

self._parsed_byte_map[index][0] <= location and

688

self._parsed_byte_map[index][1] > location):

689

# the byte region has been parsed, so no read is needed.

690

continue

691

length = 800

692

if location + length > self._size:

693

length = self._size - location

694

# todo, trim out parsed locations.

695

if length > 0:

696

readv_ranges.append((location, length))

697

# read the header if needed

698

if self._bisect_nodes is None:

699

readv_ranges.append(_HEADER_READV)

700

self._read_and_parse(readv_ranges)

701

result = []

702

if self._nodes is not None:

703

# _read_and_parse triggered a _buffer_all because we requested the

704

# whole data range

705

for location, key in location_keys:

706

if key not in self._nodes: # not present

707

result.append(((location, key), False))

708

elif self.node_ref_lists:

709

value, refs = self._nodes[key]

710

result.append(((location, key),

711

(self, key, value, refs)))

712

else:

713

result.append(((location, key),

714

(self, key, self._nodes[key])))

715

return result

716

# generate results:

717

# - figure out <, >, missing, present

718

# - result present references so we can return them.

719

# keys that we cannot answer until we resolve references

720

pending_references = []

721

pending_locations = set()

722

for location, key in location_keys:

723

# can we answer from cache?

724

if key in self._bisect_nodes:

725

# the key has been parsed, so no lookup is needed

726

if self.node_ref_lists:

727

# the references may not have been all parsed.

728

value, refs = self._bisect_nodes[key]

729

wanted_locations = []

730

for ref_list in refs:

731

for ref in ref_list:

732

if ref not in self._keys_by_offset:

733

wanted_locations.append(ref)

734

if wanted_locations:

735

pending_locations.update(wanted_locations)

736

pending_references.append((location, key))

737

continue

738

result.append(((location, key), (self, key,

739

value, self._resolve_references(refs))))

740

else:

741

result.append(((location, key),

742

(self, key, self._bisect_nodes[key])))

743

continue

744

else:

745

# has the region the key should be in, been parsed?

746

index = self._parsed_key_index(key)

747

if (self._parsed_key_map[index][0] <= key and

748

(self._parsed_key_map[index][1] >= key or

749

# end of the file has been parsed

750

self._parsed_byte_map[index][1] == self._size)):

751

result.append(((location, key), False))

752

continue

753

# no, is the key above or below the probed location:

754

# get the range of the probed & parsed location

755

index = self._parsed_byte_index(location)

756

# if the key is below the start of the range, its below

757

if key < self._parsed_key_map[index][0]:

758

direction = -1

759

else:

760

direction = +1

761

result.append(((location, key), direction))

762

readv_ranges = []

763

# lookup data to resolve references

764

for location in pending_locations:

765

length = 800

766

if location + length > self._size:

767

length = self._size - location

768

# TODO: trim out parsed locations (e.g. if the 800 is into the

769

# parsed region trim it, and dont use the adjust_for_latency

770

# facility)

771

if length > 0:

772

readv_ranges.append((location, length))

773

self._read_and_parse(readv_ranges)

774

if self._nodes is not None:

775

# The _read_and_parse triggered a _buffer_all, grab the data and

776

# return it

777

for location, key in pending_references:

778

value, refs = self._nodes[key]

779

result.append(((location, key), (self, key, value, refs)))

780

return result

781

for location, key in pending_references:

782

# answer key references we had to look-up-late.

783

value, refs = self._bisect_nodes[key]

784

result.append(((location, key), (self, key,

785

value, self._resolve_references(refs))))

786

return result

787

788

def _parse_header_from_bytes(self, bytes):

789

"""Parse the header from a region of bytes.

790

791

:param bytes: The data to parse.

792

:return: An offset, data tuple such as readv yields, for the unparsed

793

data. (which may length 0).

794

"""

795

signature = bytes[0:len(self._signature())]

796

if not signature == self._signature():

797

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

798

lines = bytes[len(self._signature()):].splitlines()

799

options_line = lines[0]

800

if not options_line.startswith(_OPTION_NODE_REFS):

801

raise errors.BadIndexOptions(self)

802

try:

803

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

804

except ValueError:

805

raise errors.BadIndexOptions(self)

806

options_line = lines[1]

807

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

808

raise errors.BadIndexOptions(self)

809

try:

810

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

811

except ValueError:

812

raise errors.BadIndexOptions(self)

813

options_line = lines[2]

814

if not options_line.startswith(_OPTION_LEN):

815

raise errors.BadIndexOptions(self)

816

try:

817

self._key_count = int(options_line[len(_OPTION_LEN):])

818

except ValueError:

819

raise errors.BadIndexOptions(self)

820

# calculate the bytes we have processed

821

header_end = (len(signature) + len(lines[0]) + len(lines[1]) +

822

len(lines[2]) + 3)

823

self._parsed_bytes(0, None, header_end, None)

824

# setup parsing state

825

self._expected_elements = 3 + self._key_length

826

# raw data keyed by offset

827

self._keys_by_offset = {}

828

# keys with the value and node references

829

self._bisect_nodes = {}

830

return header_end, bytes[header_end:]

831

832

def _parse_region(self, offset, data):

833

"""Parse node data returned from a readv operation.

834

835

:param offset: The byte offset the data starts at.

836

:param data: The data to parse.

837

"""

838

# trim the data.

839

# end first:

840

end = offset + len(data)

841

high_parsed = offset

842

while True:

843

# Trivial test - if the current index's end is within the

844

# low-matching parsed range, we're done.

845

index = self._parsed_byte_index(high_parsed)

846

if end < self._parsed_byte_map[index][1]:

847

return

848

# print "[%d:%d]" % (offset, end), \

849

# self._parsed_byte_map[index:index + 2]

850

high_parsed, last_segment = self._parse_segment(

851

offset, data, end, index)

852

if last_segment:

853

return

854

855

def _parse_segment(self, offset, data, end, index):

856

"""Parse one segment of data.

857

858

:param offset: Where 'data' begins in the file.

859

:param data: Some data to parse a segment of.

860

:param end: Where data ends

861

:param index: The current index into the parsed bytes map.

862

:return: True if the parsed segment is the last possible one in the

863

range of data.

864

:return: high_parsed_byte, last_segment.

865

high_parsed_byte is the location of the highest parsed byte in this

866

segment, last_segment is True if the parsed segment is the last

867

possible one in the data block.

868

"""

869

# default is to use all data

870

trim_end = None

871

# accomodate overlap with data before this.

872

if offset < self._parsed_byte_map[index][1]:

873

# overlaps the lower parsed region

874

# skip the parsed data

875

trim_start = self._parsed_byte_map[index][1] - offset

876

# don't trim the start for \n

877

start_adjacent = True

878

elif offset == self._parsed_byte_map[index][1]:

879

# abuts the lower parsed region

880

# use all data

881

trim_start = None

882

# do not trim anything

883

start_adjacent = True

884

else:

885

# does not overlap the lower parsed region

886

# use all data

887

trim_start = None

888

# but trim the leading \n

889

start_adjacent = False

890

if end == self._size:

891

# lines up to the end of all data:

892

# use it all

893

trim_end = None

894

# do not strip to the last \n

895

end_adjacent = True

896

last_segment = True

897

elif index + 1 == len(self._parsed_byte_map):

898

# at the end of the parsed data

899

# use it all

900

trim_end = None

901

# but strip to the last \n

902

end_adjacent = False

903

last_segment = True

904

elif end == self._parsed_byte_map[index + 1][0]:

905

# buts up against the next parsed region

906

# use it all

907

trim_end = None

908

# do not strip to the last \n

909

end_adjacent = True

910

last_segment = True

911

elif end > self._parsed_byte_map[index + 1][0]:

912

# overlaps into the next parsed region

913

# only consider the unparsed data

914

trim_end = self._parsed_byte_map[index + 1][0] - offset

915

# do not strip to the last \n as we know its an entire record

916

end_adjacent = True

917

last_segment = end < self._parsed_byte_map[index + 1][1]

918

else:

919

# does not overlap into the next region

920

# use it all

921

trim_end = None

922

# but strip to the last \n

923

end_adjacent = False

924

last_segment = True

925

# now find bytes to discard if needed

926

if not start_adjacent:

927

# work around python bug in rfind

928

if trim_start is None:

929

trim_start = data.find('\n') + 1

930

else:

931

trim_start = data.find('\n', trim_start) + 1

932

if not (trim_start != 0):

933

raise AssertionError('no \n was present')

934

# print 'removing start', offset, trim_start, repr(data[:trim_start])

935

if not end_adjacent:

936

# work around python bug in rfind

937

if trim_end is None:

938

trim_end = data.rfind('\n') + 1

939

else:

940

trim_end = data.rfind('\n', None, trim_end) + 1

941

if not (trim_end != 0):

942

raise AssertionError('no \n was present')

943

# print 'removing end', offset, trim_end, repr(data[trim_end:])

944

# adjust offset and data to the parseable data.

945

trimmed_data = data[trim_start:trim_end]

946

if not (trimmed_data):

947

raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'

948

% (trim_start, trim_end, offset, offset + len(data)))

949

if trim_start:

950

offset += trim_start

951

# print "parsing", repr(trimmed_data)

952

# splitlines mangles the \r delimiters.. don't use it.

953

lines = trimmed_data.split('\n')

954

del lines[-1]

955

pos = offset

956

first_key, last_key, nodes, _ = self._parse_lines(lines, pos)

957

for key, value in nodes:

958

self._bisect_nodes[key] = value

959

self._parsed_bytes(offset, first_key,

960

offset + len(trimmed_data), last_key)

961

return offset + len(trimmed_data), last_segment

962

963

def _parse_lines(self, lines, pos):

964

key = None

965

first_key = None

966

trailers = 0

967

nodes = []

968

for line in lines:

969

if line == '':

970

# must be at the end

971

if self._size:

972

if not (self._size == pos + 1):

973

raise AssertionError("%s %s" % (self._size, pos))

974

trailers += 1

975

continue

976

elements = line.split('\0')

977

if len(elements) != self._expected_elements:

978

raise errors.BadIndexData(self)

979

# keys are tuples. Each element is a string that may occur many

980

# times, so we intern them to save space. AB, RC, 200807

981

key = tuple([intern(element) for element in elements[:self._key_length]])

982

if first_key is None:

983

first_key = key

984

absent, references, value = elements[-3:]

985

ref_lists = []

986

for ref_string in references.split('\t'):

987

ref_lists.append(tuple([

988

int(ref) for ref in ref_string.split('\r') if ref

989

]))

990

ref_lists = tuple(ref_lists)

991

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

992

pos += len(line) + 1 # +1 for the \n

993

if absent:

994

continue

995

if self.node_ref_lists:

996

node_value = (value, ref_lists)

997

else:

998

node_value = value

999

nodes.append((key, node_value))

1000

# print "parsed ", key

1001

return first_key, key, nodes, trailers

1002

1003

def _parsed_bytes(self, start, start_key, end, end_key):

1004

"""Mark the bytes from start to end as parsed.

1005

1006

Calling self._parsed_bytes(1,2) will mark one byte (the one at offset

1007

1) as parsed.

1008

1009

:param start: The start of the parsed region.

1010

:param end: The end of the parsed region.

1011

"""

1012

index = self._parsed_byte_index(start)

1013

new_value = (start, end)

1014

new_key = (start_key, end_key)

1015

if index == -1:

1016

# first range parsed is always the beginning.

1017

self._parsed_byte_map.insert(index, new_value)

1018

self._parsed_key_map.insert(index, new_key)

1019

return

1020

# four cases:

1021

# new region

1022

# extend lower region

1023

# extend higher region

1024

# combine two regions

1025

if (index + 1 < len(self._parsed_byte_map) and

1026

self._parsed_byte_map[index][1] == start and

1027

self._parsed_byte_map[index + 1][0] == end):

1028

# combine two regions

1029

self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],

1030

self._parsed_byte_map[index + 1][1])

1031

self._parsed_key_map[index] = (self._parsed_key_map[index][0],

1032

self._parsed_key_map[index + 1][1])

1033

del self._parsed_byte_map[index + 1]

1034

del self._parsed_key_map[index + 1]

1035

elif self._parsed_byte_map[index][1] == start:

1036

# extend the lower entry

1037

self._parsed_byte_map[index] = (

1038

self._parsed_byte_map[index][0], end)

1039

self._parsed_key_map[index] = (

1040

self._parsed_key_map[index][0], end_key)

1041

elif (index + 1 < len(self._parsed_byte_map) and

1042

self._parsed_byte_map[index + 1][0] == end):

1043

# extend the higher entry

1044

self._parsed_byte_map[index + 1] = (

1045

start, self._parsed_byte_map[index + 1][1])

1046

self._parsed_key_map[index + 1] = (

1047

start_key, self._parsed_key_map[index + 1][1])

1048

else:

1049

# new entry

1050

self._parsed_byte_map.insert(index + 1, new_value)

1051

self._parsed_key_map.insert(index + 1, new_key)

1052

1053

def _read_and_parse(self, readv_ranges):

1054

"""Read the the ranges and parse the resulting data.

1055

1056

:param readv_ranges: A prepared readv range list.

1057

"""

1058

if not readv_ranges:

1059

return

1060

if self._nodes is None and self._bytes_read * 2 >= self._size:

1061

# We've already read more than 50% of the file and we are about to

1062

# request more data, just _buffer_all() and be done

1063

self._buffer_all()

1064

return

1065

1066

readv_data = self._transport.readv(self._name, readv_ranges, True,

1067

self._size)

1068

# parse

1069

for offset, data in readv_data:

1070

self._bytes_read += len(data)

1071

if offset == 0 and len(data) == self._size:

1072

# We read the whole range, most likely because the

1073

# Transport upcast our readv ranges into one long request

1074

# for enough total data to grab the whole index.

1075

self._buffer_all(StringIO(data))

1076

return

1077

if self._bisect_nodes is None:

1078

# this must be the start

1079

if not (offset == 0):

1080

raise AssertionError()

1081

offset, data = self._parse_header_from_bytes(data)

1082

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

1083

self._parse_region(offset, data)

1084

1085

def _signature(self):

1086

"""The file signature for this index type."""

1087

return _SIGNATURE

1088

1089

def validate(self):

1090

"""Validate that everything in the index can be accessed."""

1091

# iter_all validates completely at the moment, so just do that.

1092

for node in self.iter_all_entries():

1093

pass

1094

1095

1096

class CombinedGraphIndex(object):

1097

"""A GraphIndex made up from smaller GraphIndices.

1098

1099

The backing indices must implement GraphIndex, and are presumed to be

1100

static data.

1101

1102

Queries against the combined index will be made against the first index,

1103

and then the second and so on. The order of index's can thus influence

1104

performance significantly. For example, if one index is on local disk and a

1105

second on a remote server, the local disk index should be before the other

1106

in the index list.

1107

"""

1108

1109

def __init__(self, indices):

1110

"""Create a CombinedGraphIndex backed by indices.

1111

1112

:param indices: An ordered list of indices to query for data.

1113

"""

1114

self._indices = indices

1115

1116

def __repr__(self):

1117

return "%s(%s)" % (

1118

self.__class__.__name__,

1119

', '.join(map(repr, self._indices)))

1120

1121

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

1122

def get_parents(self, revision_ids):

1123

"""See graph._StackedParentsProvider.get_parents.

1124

1125

This implementation thunks the graph.Graph.get_parents api across to

1126

GraphIndex.

1127

1128

:param revision_ids: An iterable of graph keys for this graph.

1129

:return: A list of parent details for each key in revision_ids.

1130

Each parent details will be one of:

1131

* None when the key was missing

1132

* (NULL_REVISION,) when the key has no parents.

1133

* (parent_key, parent_key...) otherwise.

1134

"""

1135

parent_map = self.get_parent_map(revision_ids)

1136

return [parent_map.get(r, None) for r in revision_ids]

1137

1138

def get_parent_map(self, keys):

1139

"""See graph._StackedParentsProvider.get_parent_map"""

1140

search_keys = set(keys)

1141

if NULL_REVISION in search_keys:

1142

search_keys.discard(NULL_REVISION)

1143

found_parents = {NULL_REVISION:[]}

1144

else:

1145

found_parents = {}

1146

for index, key, value, refs in self.iter_entries(search_keys):

1147

parents = refs[0]

1148

if not parents:

1149

parents = (NULL_REVISION,)

1150

found_parents[key] = parents

1151

return found_parents

1152

1153

def insert_index(self, pos, index):

1154

"""Insert a new index in the list of indices to query.

1155

1156

:param pos: The position to insert the index.

1157

:param index: The index to insert.

1158

"""

1159

self._indices.insert(pos, index)

1160

1161

def iter_all_entries(self):

1162

"""Iterate over all keys within the index

1163

1164

Duplicate keys across child indices are presumed to have the same

1165

value and are only reported once.

1166

1167

:return: An iterable of (index, key, reference_lists, value).

1168

There is no defined order for the result iteration - it will be in

1169

the most efficient order for the index.

1170

"""

1171

seen_keys = set()

1172

for index in self._indices:

1173

for node in index.iter_all_entries():

1174

if node[1] not in seen_keys:

1175

yield node

1176

seen_keys.add(node[1])

1177

1178

def iter_entries(self, keys):

1179

"""Iterate over keys within the index.

1180

1181

Duplicate keys across child indices are presumed to have the same

1182

value and are only reported once.

1183

1184

:param keys: An iterable providing the keys to be retrieved.

1185

:return: An iterable of (index, key, reference_lists, value). There is no

1186

defined order for the result iteration - it will be in the most

1187

efficient order for the index.

1188

"""

1189

keys = set(keys)

1190

for index in self._indices:

1191

if not keys:

1192

return

1193

for node in index.iter_entries(keys):

1194

keys.remove(node[1])

1195

yield node

1196

1197

def iter_entries_prefix(self, keys):

1198

"""Iterate over keys within the index using prefix matching.

1199

1200

Duplicate keys across child indices are presumed to have the same

1201

value and are only reported once.

1202

1203

Prefix matching is applied within the tuple of a key, not to within

1204

the bytestring of each key element. e.g. if you have the keys ('foo',

1205

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1206

only the former key is returned.

1207

1208

:param keys: An iterable providing the key prefixes to be retrieved.

1209

Each key prefix takes the form of a tuple the length of a key, but

1210

with the last N elements 'None' rather than a regular bytestring.

1211

The first element cannot be 'None'.

1212

:return: An iterable as per iter_all_entries, but restricted to the

1213

keys with a matching prefix to those supplied. No additional keys

1214

will be returned, and every match that is in the index will be

1215

returned.

1216

"""

1217

keys = set(keys)

1218

if not keys:

1219

return

1220

seen_keys = set()

1221

for index in self._indices:

1222

for node in index.iter_entries_prefix(keys):

1223

if node[1] in seen_keys:

1224

continue

1225

seen_keys.add(node[1])

1226

yield node

1227

1228

def key_count(self):

1229

"""Return an estimate of the number of keys in this index.

1230

1231

For CombinedGraphIndex this is approximated by the sum of the keys of

1232

the child indices. As child indices may have duplicate keys this can

1233

have a maximum error of the number of child indices * largest number of

1234

keys in any index.

1235

"""

1236

return sum((index.key_count() for index in self._indices), 0)

1237

1238

def validate(self):

1239

"""Validate that everything in the index can be accessed."""

1240

for index in self._indices:

1241

index.validate()

1242

1243

1244

class InMemoryGraphIndex(GraphIndexBuilder):

1245

"""A GraphIndex which operates entirely out of memory and is mutable.

1246

1247

This is designed to allow the accumulation of GraphIndex entries during a

1248

single write operation, where the accumulated entries need to be immediately

1249

available - for example via a CombinedGraphIndex.

1250

"""

1251

1252

def add_nodes(self, nodes):

1253

"""Add nodes to the index.

1254

1255

:param nodes: An iterable of (key, node_refs, value) entries to add.

1256

"""

1257

if self.reference_lists:

1258

for (key, value, node_refs) in nodes:

1259

self.add_node(key, value, node_refs)

1260

else:

1261

for (key, value) in nodes:

1262

self.add_node(key, value)

1263

1264

def iter_all_entries(self):

1265

"""Iterate over all keys within the index

1266

1267

:return: An iterable of (index, key, reference_lists, value). There is no

1268

defined order for the result iteration - it will be in the most

1269

efficient order for the index (in this case dictionary hash order).

1270

"""

1271

if 'evil' in debug.debug_flags:

1272

trace.mutter_callsite(3,

1273

"iter_all_entries scales with size of history.")

1274

if self.reference_lists:

1275

for key, (absent, references, value) in self._nodes.iteritems():

1276

if not absent:

1277

yield self, key, value, references

1278

else:

1279

for key, (absent, references, value) in self._nodes.iteritems():

1280

if not absent:

1281

yield self, key, value

1282

1283

def iter_entries(self, keys):

1284

"""Iterate over keys within the index.

1285

1286

:param keys: An iterable providing the keys to be retrieved.

1287

:return: An iterable of (index, key, value, reference_lists). There is no

1288

defined order for the result iteration - it will be in the most

1289

efficient order for the index (keys iteration order in this case).

1290

"""

1291

keys = set(keys)

1292

if self.reference_lists:

1293

for key in keys.intersection(self._keys):

1294

node = self._nodes[key]

1295

if not node[0]:

1296

yield self, key, node[2], node[1]

1297

else:

1298

for key in keys.intersection(self._keys):

1299

node = self._nodes[key]

1300

if not node[0]:

1301

yield self, key, node[2]

1302

1303

def iter_entries_prefix(self, keys):

1304

"""Iterate over keys within the index using prefix matching.

1305

1306

Prefix matching is applied within the tuple of a key, not to within

1307

the bytestring of each key element. e.g. if you have the keys ('foo',

1308

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1309

only the former key is returned.

1310

1311

:param keys: An iterable providing the key prefixes to be retrieved.

1312

Each key prefix takes the form of a tuple the length of a key, but

1313

with the last N elements 'None' rather than a regular bytestring.

1314

The first element cannot be 'None'.

1315

:return: An iterable as per iter_all_entries, but restricted to the

1316

keys with a matching prefix to those supplied. No additional keys

1317

will be returned, and every match that is in the index will be

1318

returned.

1319

"""

1320

# XXX: To much duplication with the GraphIndex class; consider finding

1321

# a good place to pull out the actual common logic.

1322

keys = set(keys)

1323

if not keys:

1324

return

1325

if self._key_length == 1:

1326

for key in keys:

1327

# sanity check

1328

if key[0] is None:

1329

raise errors.BadIndexKey(key)

1330

if len(key) != self._key_length:

1331

raise errors.BadIndexKey(key)

1332

node = self._nodes[key]

1333

if node[0]:

1334

continue

1335

if self.reference_lists:

1336

yield self, key, node[2], node[1]

1337

else:

1338

yield self, key, node[2]

1339

return

1340

nodes_by_key = self._get_nodes_by_key()

1341

for key in keys:

1342

# sanity check

1343

if key[0] is None:

1344

raise errors.BadIndexKey(key)

1345

if len(key) != self._key_length:

1346

raise errors.BadIndexKey(key)

1347

# find what it refers to:

1348

key_dict = nodes_by_key

1349

elements = list(key)

1350

# find the subdict to return

1351

try:

1352

while len(elements) and elements[0] is not None:

1353

key_dict = key_dict[elements[0]]

1354

elements.pop(0)

1355

except KeyError:

1356

# a non-existant lookup.

1357

continue

1358

if len(elements):

1359

dicts = [key_dict]

1360

while dicts:

1361

key_dict = dicts.pop(-1)

1362

# can't be empty or would not exist

1363

item, value = key_dict.iteritems().next()

1364

if type(value) == dict:

1365

# push keys

1366

dicts.extend(key_dict.itervalues())

1367

else:

1368

# yield keys

1369

for value in key_dict.itervalues():

1370

yield (self, ) + value

1371

else:

1372

yield (self, ) + key_dict

1373

1374

def key_count(self):

1375

"""Return an estimate of the number of keys in this index.

1376

1377

For InMemoryGraphIndex the estimate is exact.

1378

"""

1379

return len(self._keys)

1380

1381

def validate(self):

1382

"""In memory index's have no known corruption at the moment."""

1383

1384

1385

class GraphIndexPrefixAdapter(object):

1386

"""An adapter between GraphIndex with different key lengths.

1387

1388

Queries against this will emit queries against the adapted Graph with the

1389

prefix added, queries for all items use iter_entries_prefix. The returned

1390

nodes will have their keys and node references adjusted to remove the

1391

prefix. Finally, an add_nodes_callback can be supplied - when called the

1392

nodes and references being added will have prefix prepended.

1393

"""

1394

1395

def __init__(self, adapted, prefix, missing_key_length,

1396

add_nodes_callback=None):

1397

"""Construct an adapter against adapted with prefix."""

1398

self.adapted = adapted

1399

self.prefix_key = prefix + (None,)*missing_key_length

1400

self.prefix = prefix

1401

self.prefix_len = len(prefix)

1402

self.add_nodes_callback = add_nodes_callback

1403

1404

def add_nodes(self, nodes):

1405

"""Add nodes to the index.

1406

1407

:param nodes: An iterable of (key, node_refs, value) entries to add.

1408

"""

1409

# save nodes in case its an iterator

1410

nodes = tuple(nodes)

1411

translated_nodes = []

1412

try:

1413

# Add prefix_key to each reference node_refs is a tuple of tuples,

1414

# so split it apart, and add prefix_key to the internal reference

1415

for (key, value, node_refs) in nodes:

1416

adjusted_references = (

1417

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

1418

for ref_list in node_refs))

1419

translated_nodes.append((self.prefix + key, value,

1420

adjusted_references))

1421

except ValueError:

1422

# XXX: TODO add an explicit interface for getting the reference list

1423

# status, to handle this bit of user-friendliness in the API more

1424

# explicitly.

1425

for (key, value) in nodes:

1426

translated_nodes.append((self.prefix + key, value))

1427

self.add_nodes_callback(translated_nodes)

1428

1429

def add_node(self, key, value, references=()):

1430

"""Add a node to the index.

1431

1432

:param key: The key. keys are non-empty tuples containing

1433

as many whitespace-free utf8 bytestrings as the key length

1434

defined for this index.

1435

:param references: An iterable of iterables of keys. Each is a

1436

reference to another key.

1437

:param value: The value to associate with the key. It may be any

1438

bytes as long as it does not contain \0 or \n.

1439

"""

1440

self.add_nodes(((key, value, references), ))

1441

1442

def _strip_prefix(self, an_iter):

1443

"""Strip prefix data from nodes and return it."""

1444

for node in an_iter:

1445

# cross checks

1446

if node[1][:self.prefix_len] != self.prefix:

1447

raise errors.BadIndexData(self)

1448

for ref_list in node[3]:

1449

for ref_node in ref_list:

1450

if ref_node[:self.prefix_len] != self.prefix:

1451

raise errors.BadIndexData(self)

1452

yield node[0], node[1][self.prefix_len:], node[2], (

1453

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

1454

for ref_list in node[3]))

1455

1456

def iter_all_entries(self):

1457

"""Iterate over all keys within the index

1458

1459

iter_all_entries is implemented against the adapted index using

1460

iter_entries_prefix.

1461

1462

:return: An iterable of (index, key, reference_lists, value). There is no

1463

defined order for the result iteration - it will be in the most

1464

efficient order for the index (in this case dictionary hash order).

1465

"""

1466

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

1467

1468

def iter_entries(self, keys):

1469

"""Iterate over keys within the index.

1470

1471

:param keys: An iterable providing the keys to be retrieved.

1472

:return: An iterable of (index, key, value, reference_lists). There is no

1473

defined order for the result iteration - it will be in the most

1474

efficient order for the index (keys iteration order in this case).

1475

"""

1476

return self._strip_prefix(self.adapted.iter_entries(

1477

self.prefix + key for key in keys))

1478

1479

def iter_entries_prefix(self, keys):

1480

"""Iterate over keys within the index using prefix matching.

1481

1482

Prefix matching is applied within the tuple of a key, not to within

1483

the bytestring of each key element. e.g. if you have the keys ('foo',

1484

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1485

only the former key is returned.

1486

1487

:param keys: An iterable providing the key prefixes to be retrieved.

1488

Each key prefix takes the form of a tuple the length of a key, but

1489

with the last N elements 'None' rather than a regular bytestring.

1490

The first element cannot be 'None'.

1491

:return: An iterable as per iter_all_entries, but restricted to the

1492

keys with a matching prefix to those supplied. No additional keys

1493

will be returned, and every match that is in the index will be

1494

returned.

1495

"""

1496

return self._strip_prefix(self.adapted.iter_entries_prefix(

1497

self.prefix + key for key in keys))

1498

1499

def key_count(self):

1500

"""Return an estimate of the number of keys in this index.

1501

1502

For GraphIndexPrefixAdapter this is relatively expensive - key

1503

iteration with the prefix is done.

1504

"""

1505

return len(list(self.iter_all_entries()))

1506

1507

def validate(self):

1508

"""Call the adapted's validate."""

1509

self.adapted.validate()

Older »