~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Martin Pool
Date: 2005-04-26 05:20:17 UTC
Revision ID: mbp@sourcefrog.net-20050426052016-8445d0f4fec584d0

- move all TODO items into ./TODO

files added:
build-api

bzrlib/mdiff.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/tests.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/faq.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quickref.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

test.sh

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/elementtree/ => elementtree/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/osutils.py

bzrlib/revision.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

elementtree/ElementTree.py

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Indexing facilities."""

__all__ = [

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from bisect import bisect_right

from cStringIO import StringIO

import re

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.bisect_multi import bisect_multi_bytes

from bzrlib.revision import NULL_REVISION

from bzrlib.trace import mutter

""")

from bzrlib import (

debug,

errors,

symbol_versioning,

)

_HEADER_READV = (0, 200)

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')

_newline_null_re = re.compile('[\n\0]')

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

The resulting graph has the structure:

_SIGNATURE OPTIONS NODES NEWLINE

_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE

OPTIONS := 'node_ref_lists=' DIGITS NEWLINE

NODES := NODE*

NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE

KEY := Not-whitespace-utf8

ABSENT := 'a'

REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}

REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?

REFERENCE := DIGITS ; digits is the byte offset in the index of the

; referenced key.

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

# A dict of {key: (absent, ref_lists, value)}

self._nodes = {}

self._nodes_by_key = None

self._key_length = key_elements

self._optimize_for_size = False

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

100

def _get_nodes_by_key(self):

101

if self._nodes_by_key is None:

102

nodes_by_key = {}

103

if self.reference_lists:

104

for key, (absent, references, value) in self._nodes.iteritems():

105

if absent:

106

continue

107

key_dict = nodes_by_key

108

for subkey in key[:-1]:

109

key_dict = key_dict.setdefault(subkey, {})

110

key_dict[key[-1]] = key, value, references

111

else:

112

for key, (absent, references, value) in self._nodes.iteritems():

113

if absent:

114

continue

115

key_dict = nodes_by_key

116

for subkey in key[:-1]:

117

key_dict = key_dict.setdefault(subkey, {})

118

key_dict[key[-1]] = key, value

119

self._nodes_by_key = nodes_by_key

120

return self._nodes_by_key

121

122

def _update_nodes_by_key(self, key, value, node_refs):

123

"""Update the _nodes_by_key dict with a new key.

124

125

For a key of (foo, bar, baz) create

126

_nodes_by_key[foo][bar][baz] = key_value

127

"""

128

if self._nodes_by_key is None:

129

return

130

key_dict = self._nodes_by_key

131

if self.reference_lists:

132

key_value = key, value, node_refs

133

else:

134

key_value = key, value

135

for subkey in key[:-1]:

136

key_dict = key_dict.setdefault(subkey, {})

137

key_dict[key[-1]] = key_value

138

139

def _check_key_ref_value(self, key, references, value):

140

"""Check that 'key' and 'references' are all valid.

141

142

:param key: A key tuple. Must conform to the key interface (be a tuple,

143

be of the right length, not have any whitespace or nulls in any key

144

element.)

145

:param references: An iterable of reference lists. Something like

146

[[(ref, key)], [(ref, key), (other, key)]]

147

:param value: The value associate with this key. Must not contain

148

newlines or null characters.

149

:return: (node_refs, absent_references)

150

node_refs basically a packed form of 'references' where all

151

iterables are tuples

152

absent_references reference keys that are not in self._nodes.

153

This may contain duplicates if the same key is

154

referenced in multiple lists.

155

"""

156

self._check_key(key)

157

if _newline_null_re.search(value) is not None:

158

raise errors.BadIndexValue(value)

159

if len(references) != self.reference_lists:

160

raise errors.BadIndexValue(references)

161

node_refs = []

162

absent_references = []

163

for reference_list in references:

164

for reference in reference_list:

165

# If reference *is* in self._nodes, then we know it has already

166

# been checked.

167

if reference not in self._nodes:

168

self._check_key(reference)

169

absent_references.append(reference)

170

node_refs.append(tuple(reference_list))

171

return tuple(node_refs), absent_references

172

173

def add_node(self, key, value, references=()):

174

"""Add a node to the index.

175

176

:param key: The key. keys are non-empty tuples containing

177

as many whitespace-free utf8 bytestrings as the key length

178

defined for this index.

179

:param references: An iterable of iterables of keys. Each is a

180

reference to another key.

181

:param value: The value to associate with the key. It may be any

182

bytes as long as it does not contain \0 or \n.

183

"""

184

(node_refs,

185

absent_references) = self._check_key_ref_value(key, references, value)

186

if key in self._nodes and self._nodes[key][0] != 'a':

187

raise errors.BadIndexDuplicateKey(key, self)

188

for reference in absent_references:

189

# There may be duplicates, but I don't think it is worth worrying

190

# about

191

self._nodes[reference] = ('a', (), '')

192

self._nodes[key] = ('', node_refs, value)

193

self._keys.add(key)

194

if self._nodes_by_key is not None and self._key_length > 1:

195

self._update_nodes_by_key(key, value, node_refs)

196

197

def finish(self):

198

lines = [_SIGNATURE]

199

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

200

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

201

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

202

prefix_length = sum(len(x) for x in lines)

203

# references are byte offsets. To avoid having to do nasty

204

# polynomial work to resolve offsets (references to later in the

205

# file cannot be determined until all the inbetween references have

206

# been calculated too) we pad the offsets with 0's to make them be

207

# of consistent length. Using binary offsets would break the trivial

208

# file parsing.

209

# to calculate the width of zero's needed we do three passes:

210

# one to gather all the non-reference data and the number of references.

211

# one to pad all the data with reference-length and determine entry

212

# addresses.

213

# One to serialise.

214

215

# forward sorted by key. In future we may consider topological sorting,

216

# at the cost of table scans for direct lookup, or a second index for

217

# direct lookup

218

nodes = sorted(self._nodes.items())

219

# if we do not prepass, we don't know how long it will be up front.

220

expected_bytes = None

221

# we only need to pre-pass if we have reference lists at all.

222

if self.reference_lists:

223

key_offset_info = []

224

non_ref_bytes = prefix_length

225

total_references = 0

226

# TODO use simple multiplication for the constants in this loop.

227

for key, (absent, references, value) in nodes:

228

# record the offset known *so far* for this key:

229

# the non reference bytes to date, and the total references to

230

# date - saves reaccumulating on the second pass

231

key_offset_info.append((key, non_ref_bytes, total_references))

232

# key is literal, value is literal, there are 3 null's, 1 NL

233

# key is variable length tuple, \x00 between elements

234

non_ref_bytes += sum(len(element) for element in key)

235

if self._key_length > 1:

236

non_ref_bytes += self._key_length - 1

237

# value is literal bytes, there are 3 null's, 1 NL.

238

non_ref_bytes += len(value) + 3 + 1

239

# one byte for absent if set.

240

if absent:

241

non_ref_bytes += 1

242

elif self.reference_lists:

243

# (ref_lists -1) tabs

244

non_ref_bytes += self.reference_lists - 1

245

# (ref-1 cr's per ref_list)

246

for ref_list in references:

247

# how many references across the whole file?

248

total_references += len(ref_list)

249

# accrue reference separators

250

if ref_list:

251

non_ref_bytes += len(ref_list) - 1

252

# how many digits are needed to represent the total byte count?

253

digits = 1

254

possible_total_bytes = non_ref_bytes + total_references*digits

255

while 10 ** digits < possible_total_bytes:

256

digits += 1

257

possible_total_bytes = non_ref_bytes + total_references*digits

258

expected_bytes = possible_total_bytes + 1 # terminating newline

259

# resolve key addresses.

260

key_addresses = {}

261

for key, non_ref_bytes, total_references in key_offset_info:

262

key_addresses[key] = non_ref_bytes + total_references*digits

263

# serialise

264

format_string = '%%0%sd' % digits

265

for key, (absent, references, value) in nodes:

266

flattened_references = []

267

for ref_list in references:

268

ref_addresses = []

269

for reference in ref_list:

270

ref_addresses.append(format_string % key_addresses[reference])

271

flattened_references.append('\r'.join(ref_addresses))

272

string_key = '\x00'.join(key)

273

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

274

'\t'.join(flattened_references), value))

275

lines.append('\n')

276

result = StringIO(''.join(lines))

277

if expected_bytes and len(result.getvalue()) != expected_bytes:

278

raise errors.BzrError('Failed index creation. Internal error:'

279

' mismatched output length and expected length: %d %d' %

280

(len(result.getvalue()), expected_bytes))

281

return result

282

283

def set_optimize(self, for_size=True):

284

"""Change how the builder tries to optimize the result.

285

286

:param for_size: Tell the builder to try and make the index as small as

287

possible.

288

:return: None

289

"""

290

# GraphIndexBuilder itself doesn't pay attention to the flag yet, but

291

# other builders do.

292

self._optimize_for_size = for_size

293

294

295

class GraphIndex(object):

296

"""An index for data with embedded graphs.

297

298

The index maps keys to a list of key reference lists, and a value.

299

Each node has the same number of key reference lists. Each key reference

300

list can be empty or an arbitrary length. The value is an opaque NULL

301

terminated string without any newlines. The storage of the index is

302

hidden in the interface: keys and key references are always tuples of

303

bytestrings, never the internal representation (e.g. dictionary offsets).

304

305

It is presumed that the index will not be mutated - it is static data.

306

307

Successive iter_all_entries calls will read the entire index each time.

308

Additionally, iter_entries calls will read the index linearly until the

309

desired keys are found. XXX: This must be fixed before the index is

310

suitable for production use. :XXX

311

"""

312

313

def __init__(self, transport, name, size):

314

"""Open an index called name on transport.

315

316

:param transport: A bzrlib.transport.Transport.

317

:param name: A path to provide to transport API calls.

318

:param size: The size of the index in bytes. This is used for bisection

319

logic to perform partial index reads. While the size could be

320

obtained by statting the file this introduced an additional round

321

trip as well as requiring stat'able transports, both of which are

322

avoided by having it supplied. If size is None, then bisection

323

support will be disabled and accessing the index will just stream

324

all the data.

325

"""

326

self._transport = transport

327

self._name = name

328

# Becomes a dict of key:(value, reference-list-byte-locations) used by

329

# the bisection interface to store parsed but not resolved keys.

330

self._bisect_nodes = None

331

# Becomes a dict of key:(value, reference-list-keys) which are ready to

332

# be returned directly to callers.

333

self._nodes = None

334

# a sorted list of slice-addresses for the parsed bytes of the file.

335

# e.g. (0,1) would mean that byte 0 is parsed.

336

self._parsed_byte_map = []

337

# a sorted list of keys matching each slice address for parsed bytes

338

# e.g. (None, 'foo@bar') would mean that the first byte contained no

339

# key, and the end byte of the slice is the of the data for 'foo@bar'

340

self._parsed_key_map = []

341

self._key_count = None

342

self._keys_by_offset = None

343

self._nodes_by_key = None

344

self._size = size

345

# The number of bytes we've read so far in trying to process this file

346

self._bytes_read = 0

347

348

def __eq__(self, other):

349

"""Equal when self and other were created with the same parameters."""

350

return (

351

type(self) == type(other) and

352

self._transport == other._transport and

353

self._name == other._name and

354

self._size == other._size)

355

356

def __ne__(self, other):

357

return not self.__eq__(other)

358

359

def __repr__(self):

360

return "%s(%r)" % (self.__class__.__name__,

361

self._transport.abspath(self._name))

362

363

def _buffer_all(self, stream=None):

364

"""Buffer all the index data.

365

366

Mutates self._nodes and self.keys_by_offset.

367

"""

368

if self._nodes is not None:

369

# We already did this

370

return

371

if 'index' in debug.debug_flags:

372

mutter('Reading entire index %s', self._transport.abspath(self._name))

373

if stream is None:

374

stream = self._transport.get(self._name)

375

self._read_prefix(stream)

376

self._expected_elements = 3 + self._key_length

377

line_count = 0

378

# raw data keyed by offset

379

self._keys_by_offset = {}

380

# ready-to-return key:value or key:value, node_ref_lists

381

self._nodes = {}

382

self._nodes_by_key = None

383

trailers = 0

384

pos = stream.tell()

385

lines = stream.read().split('\n')

386

del lines[-1]

387

_, _, _, trailers = self._parse_lines(lines, pos)

388

for key, absent, references, value in self._keys_by_offset.itervalues():

389

if absent:

390

continue

391

# resolve references:

392

if self.node_ref_lists:

393

node_value = (value, self._resolve_references(references))

394

else:

395

node_value = value

396

self._nodes[key] = node_value

397

# cache the keys for quick set intersections

398

self._keys = set(self._nodes)

399

if trailers != 1:

400

# there must be one line - the empty trailer line.

401

raise errors.BadIndexData(self)

402

403

def _get_nodes_by_key(self):

404

if self._nodes_by_key is None:

405

nodes_by_key = {}

406

if self.node_ref_lists:

407

for key, (value, references) in self._nodes.iteritems():

408

key_dict = nodes_by_key

409

for subkey in key[:-1]:

410

key_dict = key_dict.setdefault(subkey, {})

411

key_dict[key[-1]] = key, value, references

412

else:

413

for key, value in self._nodes.iteritems():

414

key_dict = nodes_by_key

415

for subkey in key[:-1]:

416

key_dict = key_dict.setdefault(subkey, {})

417

key_dict[key[-1]] = key, value

418

self._nodes_by_key = nodes_by_key

419

return self._nodes_by_key

420

421

def iter_all_entries(self):

422

"""Iterate over all keys within the index.

423

424

:return: An iterable of (index, key, value) or (index, key, value, reference_lists).

425

The former tuple is used when there are no reference lists in the

426

index, making the API compatible with simple key:value index types.

427

There is no defined order for the result iteration - it will be in

428

the most efficient order for the index.

429

"""

430

if 'evil' in debug.debug_flags:

431

trace.mutter_callsite(3,

432

"iter_all_entries scales with size of history.")

433

if self._nodes is None:

434

self._buffer_all()

435

if self.node_ref_lists:

436

for key, (value, node_ref_lists) in self._nodes.iteritems():

437

yield self, key, value, node_ref_lists

438

else:

439

for key, value in self._nodes.iteritems():

440

yield self, key, value

441

442

def _read_prefix(self, stream):

443

signature = stream.read(len(self._signature()))

444

if not signature == self._signature():

445

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

446

options_line = stream.readline()

447

if not options_line.startswith(_OPTION_NODE_REFS):

448

raise errors.BadIndexOptions(self)

449

try:

450

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

451

except ValueError:

452

raise errors.BadIndexOptions(self)

453

options_line = stream.readline()

454

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

455

raise errors.BadIndexOptions(self)

456

try:

457

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

458

except ValueError:

459

raise errors.BadIndexOptions(self)

460

options_line = stream.readline()

461

if not options_line.startswith(_OPTION_LEN):

462

raise errors.BadIndexOptions(self)

463

try:

464

self._key_count = int(options_line[len(_OPTION_LEN):-1])

465

except ValueError:

466

raise errors.BadIndexOptions(self)

467

468

def _resolve_references(self, references):

469

"""Return the resolved key references for references.

470

471

References are resolved by looking up the location of the key in the

472

_keys_by_offset map and substituting the key name, preserving ordering.

473

474

:param references: An iterable of iterables of key locations. e.g.

475

[[123, 456], [123]]

476

:return: A tuple of tuples of keys.

477

"""

478

node_refs = []

479

for ref_list in references:

480

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

481

return tuple(node_refs)

482

483

def _find_index(self, range_map, key):

484

"""Helper for the _parsed_*_index calls.

485

486

Given a range map - [(start, end), ...], finds the index of the range

487

in the map for key if it is in the map, and if it is not there, the

488

immediately preceeding range in the map.

489

"""

490

result = bisect_right(range_map, key) - 1

491

if result + 1 < len(range_map):

492

# check the border condition, it may be in result + 1

493

if range_map[result + 1][0] == key[0]:

494

return result + 1

495

return result

496

497

def _parsed_byte_index(self, offset):

498

"""Return the index of the entry immediately before offset.

499

500

e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that

501

there is one unparsed byte (the 11th, addressed as[10]). then:

502

asking for 0 will return 0

503

asking for 10 will return 0

504

asking for 11 will return 1

505

asking for 12 will return 1

506

"""

507

key = (offset, 0)

508

return self._find_index(self._parsed_byte_map, key)

509

510

def _parsed_key_index(self, key):

511

"""Return the index of the entry immediately before key.

512

513

e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,

514

meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive

515

have been parsed, then:

516

asking for '' will return 0

517

asking for 'a' will return 0

518

asking for 'b' will return 1

519

asking for 'e' will return 1

520

"""

521

search_key = (key, None)

522

return self._find_index(self._parsed_key_map, search_key)

523

524

def _is_parsed(self, offset):

525

"""Returns True if offset has been parsed."""

526

index = self._parsed_byte_index(offset)

527

if index == len(self._parsed_byte_map):

528

return offset < self._parsed_byte_map[index - 1][1]

529

start, end = self._parsed_byte_map[index]

530

return offset >= start and offset < end

531

532

def _iter_entries_from_total_buffer(self, keys):

533

"""Iterate over keys when the entire index is parsed."""

534

keys = keys.intersection(self._keys)

535

if self.node_ref_lists:

536

for key in keys:

537

value, node_refs = self._nodes[key]

538

yield self, key, value, node_refs

539

else:

540

for key in keys:

541

yield self, key, self._nodes[key]

542

543

def iter_entries(self, keys):

544

"""Iterate over keys within the index.

545

546

:param keys: An iterable providing the keys to be retrieved.

547

:return: An iterable as per iter_all_entries, but restricted to the

548

keys supplied. No additional keys will be returned, and every

549

key supplied that is in the index will be returned.

550

"""

551

keys = set(keys)

552

if not keys:

553

return []

554

if self._size is None and self._nodes is None:

555

self._buffer_all()

556

557

# We fit about 20 keys per minimum-read (4K), so if we are looking for

558

# more than 1/20th of the index its likely (assuming homogenous key

559

# spread) that we'll read the entire index. If we're going to do that,

560

# buffer the whole thing. A better analysis might take key spread into

561

# account - but B+Tree indices are better anyway.

562

# We could look at all data read, and use a threshold there, which will

563

# trigger on ancestry walks, but that is not yet fully mapped out.

564

if self._nodes is None and len(keys) * 20 > self.key_count():

565

self._buffer_all()

566

if self._nodes is not None:

567

return self._iter_entries_from_total_buffer(keys)

568

else:

569

return (result[1] for result in bisect_multi_bytes(

570

self._lookup_keys_via_location, self._size, keys))

571

572

def iter_entries_prefix(self, keys):

573

"""Iterate over keys within the index using prefix matching.

574

575

Prefix matching is applied within the tuple of a key, not to within

576

the bytestring of each key element. e.g. if you have the keys ('foo',

577

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

578

only the former key is returned.

579

580

WARNING: Note that this method currently causes a full index parse

581

unconditionally (which is reasonably appropriate as it is a means for

582

thunking many small indices into one larger one and still supplies

583

iter_all_entries at the thunk layer).

584

585

:param keys: An iterable providing the key prefixes to be retrieved.

586

Each key prefix takes the form of a tuple the length of a key, but

587

with the last N elements 'None' rather than a regular bytestring.

588

The first element cannot be 'None'.

589

:return: An iterable as per iter_all_entries, but restricted to the

590

keys with a matching prefix to those supplied. No additional keys

591

will be returned, and every match that is in the index will be

592

returned.

593

"""

594

keys = set(keys)

595

if not keys:

596

return

597

# load data - also finds key lengths

598

if self._nodes is None:

599

self._buffer_all()

600

if self._key_length == 1:

601

for key in keys:

602

# sanity check

603

if key[0] is None:

604

raise errors.BadIndexKey(key)

605

if len(key) != self._key_length:

606

raise errors.BadIndexKey(key)

607

if self.node_ref_lists:

608

value, node_refs = self._nodes[key]

609

yield self, key, value, node_refs

610

else:

611

yield self, key, self._nodes[key]

612

return

613

nodes_by_key = self._get_nodes_by_key()

614

for key in keys:

615

# sanity check

616

if key[0] is None:

617

raise errors.BadIndexKey(key)

618

if len(key) != self._key_length:

619

raise errors.BadIndexKey(key)

620

# find what it refers to:

621

key_dict = nodes_by_key

622

elements = list(key)

623

# find the subdict whose contents should be returned.

624

try:

625

while len(elements) and elements[0] is not None:

626

key_dict = key_dict[elements[0]]

627

elements.pop(0)

628

except KeyError:

629

# a non-existant lookup.

630

continue

631

if len(elements):

632

dicts = [key_dict]

633

while dicts:

634

key_dict = dicts.pop(-1)

635

# can't be empty or would not exist

636

item, value = key_dict.iteritems().next()

637

if type(value) == dict:

638

# push keys

639

dicts.extend(key_dict.itervalues())

640

else:

641

# yield keys

642

for value in key_dict.itervalues():

643

# each value is the key:value:node refs tuple

644

# ready to yield.

645

yield (self, ) + value

646

else:

647

# the last thing looked up was a terminal element

648

yield (self, ) + key_dict

649

650

def key_count(self):

651

"""Return an estimate of the number of keys in this index.

652

653

For GraphIndex the estimate is exact.

654

"""

655

if self._key_count is None:

656

self._read_and_parse([_HEADER_READV])

657

return self._key_count

658

659

def _lookup_keys_via_location(self, location_keys):

660

"""Public interface for implementing bisection.

661

662

If _buffer_all has been called, then all the data for the index is in

663

memory, and this method should not be called, as it uses a separate

664

cache because it cannot pre-resolve all indices, which buffer_all does

665

for performance.

666

667

:param location_keys: A list of location(byte offset), key tuples.

668

:return: A list of (location_key, result) tuples as expected by

669

bzrlib.bisect_multi.bisect_multi_bytes.

670

"""

671

# Possible improvements:

672

# - only bisect lookup each key once

673

# - sort the keys first, and use that to reduce the bisection window

674

# -----

675

# this progresses in three parts:

676

# read data

677

# parse it

678

# attempt to answer the question from the now in memory data.

679

# build the readv request

680

# for each location, ask for 800 bytes - much more than rows we've seen

681

# anywhere.

682

readv_ranges = []

683

for location, key in location_keys:

684

# can we answer from cache?

685

if self._bisect_nodes and key in self._bisect_nodes:

686

# We have the key parsed.

687

continue

688

index = self._parsed_key_index(key)

689

if (len(self._parsed_key_map) and

690

self._parsed_key_map[index][0] <= key and

691

(self._parsed_key_map[index][1] >= key or

692

# end of the file has been parsed

693

self._parsed_byte_map[index][1] == self._size)):

694

# the key has been parsed, so no lookup is needed even if its

695

# not present.

696

continue

697

# - if we have examined this part of the file already - yes

698

index = self._parsed_byte_index(location)

699

if (len(self._parsed_byte_map) and

700

self._parsed_byte_map[index][0] <= location and

701

self._parsed_byte_map[index][1] > location):

702

# the byte region has been parsed, so no read is needed.

703

continue

704

length = 800

705

if location + length > self._size:

706

length = self._size - location

707

# todo, trim out parsed locations.

708

if length > 0:

709

readv_ranges.append((location, length))

710

# read the header if needed

711

if self._bisect_nodes is None:

712

readv_ranges.append(_HEADER_READV)

713

self._read_and_parse(readv_ranges)

714

result = []

715

if self._nodes is not None:

716

# _read_and_parse triggered a _buffer_all because we requested the

717

# whole data range

718

for location, key in location_keys:

719

if key not in self._nodes: # not present

720

result.append(((location, key), False))

721

elif self.node_ref_lists:

722

value, refs = self._nodes[key]

723

result.append(((location, key),

724

(self, key, value, refs)))

725

else:

726

result.append(((location, key),

727

(self, key, self._nodes[key])))

728

return result

729

# generate results:

730

# - figure out <, >, missing, present

731

# - result present references so we can return them.

732

# keys that we cannot answer until we resolve references

733

pending_references = []

734

pending_locations = set()

735

for location, key in location_keys:

736

# can we answer from cache?

737

if key in self._bisect_nodes:

738

# the key has been parsed, so no lookup is needed

739

if self.node_ref_lists:

740

# the references may not have been all parsed.

741

value, refs = self._bisect_nodes[key]

742

wanted_locations = []

743

for ref_list in refs:

744

for ref in ref_list:

745

if ref not in self._keys_by_offset:

746

wanted_locations.append(ref)

747

if wanted_locations:

748

pending_locations.update(wanted_locations)

749

pending_references.append((location, key))

750

continue

751

result.append(((location, key), (self, key,

752

value, self._resolve_references(refs))))

753

else:

754

result.append(((location, key),

755

(self, key, self._bisect_nodes[key])))

756

continue

757

else:

758

# has the region the key should be in, been parsed?

759

index = self._parsed_key_index(key)

760

if (self._parsed_key_map[index][0] <= key and

761

(self._parsed_key_map[index][1] >= key or

762

# end of the file has been parsed

763

self._parsed_byte_map[index][1] == self._size)):

764

result.append(((location, key), False))

765

continue

766

# no, is the key above or below the probed location:

767

# get the range of the probed & parsed location

768

index = self._parsed_byte_index(location)

769

# if the key is below the start of the range, its below

770

if key < self._parsed_key_map[index][0]:

771

direction = -1

772

else:

773

direction = +1

774

result.append(((location, key), direction))

775

readv_ranges = []

776

# lookup data to resolve references

777

for location in pending_locations:

778

length = 800

779

if location + length > self._size:

780

length = self._size - location

781

# TODO: trim out parsed locations (e.g. if the 800 is into the

782

# parsed region trim it, and dont use the adjust_for_latency

783

# facility)

784

if length > 0:

785

readv_ranges.append((location, length))

786

self._read_and_parse(readv_ranges)

787

if self._nodes is not None:

788

# The _read_and_parse triggered a _buffer_all, grab the data and

789

# return it

790

for location, key in pending_references:

791

value, refs = self._nodes[key]

792

result.append(((location, key), (self, key, value, refs)))

793

return result

794

for location, key in pending_references:

795

# answer key references we had to look-up-late.

796

value, refs = self._bisect_nodes[key]

797

result.append(((location, key), (self, key,

798

value, self._resolve_references(refs))))

799

return result

800

801

def _parse_header_from_bytes(self, bytes):

802

"""Parse the header from a region of bytes.

803

804

:param bytes: The data to parse.

805

:return: An offset, data tuple such as readv yields, for the unparsed

806

data. (which may length 0).

807

"""

808

signature = bytes[0:len(self._signature())]

809

if not signature == self._signature():

810

raise errors.BadIndexFormatSignature(self._name, GraphIndex)

811

lines = bytes[len(self._signature()):].splitlines()

812

options_line = lines[0]

813

if not options_line.startswith(_OPTION_NODE_REFS):

814

raise errors.BadIndexOptions(self)

815

try:

816

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])

817

except ValueError:

818

raise errors.BadIndexOptions(self)

819

options_line = lines[1]

820

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

821

raise errors.BadIndexOptions(self)

822

try:

823

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])

824

except ValueError:

825

raise errors.BadIndexOptions(self)

826

options_line = lines[2]

827

if not options_line.startswith(_OPTION_LEN):

828

raise errors.BadIndexOptions(self)

829

try:

830

self._key_count = int(options_line[len(_OPTION_LEN):])

831

except ValueError:

832

raise errors.BadIndexOptions(self)

833

# calculate the bytes we have processed

834

header_end = (len(signature) + len(lines[0]) + len(lines[1]) +

835

len(lines[2]) + 3)

836

self._parsed_bytes(0, None, header_end, None)

837

# setup parsing state

838

self._expected_elements = 3 + self._key_length

839

# raw data keyed by offset

840

self._keys_by_offset = {}

841

# keys with the value and node references

842

self._bisect_nodes = {}

843

return header_end, bytes[header_end:]

844

845

def _parse_region(self, offset, data):

846

"""Parse node data returned from a readv operation.

847

848

:param offset: The byte offset the data starts at.

849

:param data: The data to parse.

850

"""

851

# trim the data.

852

# end first:

853

end = offset + len(data)

854

high_parsed = offset

855

while True:

856

# Trivial test - if the current index's end is within the

857

# low-matching parsed range, we're done.

858

index = self._parsed_byte_index(high_parsed)

859

if end < self._parsed_byte_map[index][1]:

860

return

861

# print "[%d:%d]" % (offset, end), \

862

# self._parsed_byte_map[index:index + 2]

863

high_parsed, last_segment = self._parse_segment(

864

offset, data, end, index)

865

if last_segment:

866

return

867

868

def _parse_segment(self, offset, data, end, index):

869

"""Parse one segment of data.

870

871

:param offset: Where 'data' begins in the file.

872

:param data: Some data to parse a segment of.

873

:param end: Where data ends

874

:param index: The current index into the parsed bytes map.

875

:return: True if the parsed segment is the last possible one in the

876

range of data.

877

:return: high_parsed_byte, last_segment.

878

high_parsed_byte is the location of the highest parsed byte in this

879

segment, last_segment is True if the parsed segment is the last

880

possible one in the data block.

881

"""

882

# default is to use all data

883

trim_end = None

884

# accomodate overlap with data before this.

885

if offset < self._parsed_byte_map[index][1]:

886

# overlaps the lower parsed region

887

# skip the parsed data

888

trim_start = self._parsed_byte_map[index][1] - offset

889

# don't trim the start for \n

890

start_adjacent = True

891

elif offset == self._parsed_byte_map[index][1]:

892

# abuts the lower parsed region

893

# use all data

894

trim_start = None

895

# do not trim anything

896

start_adjacent = True

897

else:

898

# does not overlap the lower parsed region

899

# use all data

900

trim_start = None

901

# but trim the leading \n

902

start_adjacent = False

903

if end == self._size:

904

# lines up to the end of all data:

905

# use it all

906

trim_end = None

907

# do not strip to the last \n

908

end_adjacent = True

909

last_segment = True

910

elif index + 1 == len(self._parsed_byte_map):

911

# at the end of the parsed data

912

# use it all

913

trim_end = None

914

# but strip to the last \n

915

end_adjacent = False

916

last_segment = True

917

elif end == self._parsed_byte_map[index + 1][0]:

918

# buts up against the next parsed region

919

# use it all

920

trim_end = None

921

# do not strip to the last \n

922

end_adjacent = True

923

last_segment = True

924

elif end > self._parsed_byte_map[index + 1][0]:

925

# overlaps into the next parsed region

926

# only consider the unparsed data

927

trim_end = self._parsed_byte_map[index + 1][0] - offset

928

# do not strip to the last \n as we know its an entire record

929

end_adjacent = True

930

last_segment = end < self._parsed_byte_map[index + 1][1]

931

else:

932

# does not overlap into the next region

933

# use it all

934

trim_end = None

935

# but strip to the last \n

936

end_adjacent = False

937

last_segment = True

938

# now find bytes to discard if needed

939

if not start_adjacent:

940

# work around python bug in rfind

941

if trim_start is None:

942

trim_start = data.find('\n') + 1

943

else:

944

trim_start = data.find('\n', trim_start) + 1

945

if not (trim_start != 0):

946

raise AssertionError('no \n was present')

947

# print 'removing start', offset, trim_start, repr(data[:trim_start])

948

if not end_adjacent:

949

# work around python bug in rfind

950

if trim_end is None:

951

trim_end = data.rfind('\n') + 1

952

else:

953

trim_end = data.rfind('\n', None, trim_end) + 1

954

if not (trim_end != 0):

955

raise AssertionError('no \n was present')

956

# print 'removing end', offset, trim_end, repr(data[trim_end:])

957

# adjust offset and data to the parseable data.

958

trimmed_data = data[trim_start:trim_end]

959

if not (trimmed_data):

960

raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'

961

% (trim_start, trim_end, offset, offset + len(data)))

962

if trim_start:

963

offset += trim_start

964

# print "parsing", repr(trimmed_data)

965

# splitlines mangles the \r delimiters.. don't use it.

966

lines = trimmed_data.split('\n')

967

del lines[-1]

968

pos = offset

969

first_key, last_key, nodes, _ = self._parse_lines(lines, pos)

970

for key, value in nodes:

971

self._bisect_nodes[key] = value

972

self._parsed_bytes(offset, first_key,

973

offset + len(trimmed_data), last_key)

974

return offset + len(trimmed_data), last_segment

975

976

def _parse_lines(self, lines, pos):

977

key = None

978

first_key = None

979

trailers = 0

980

nodes = []

981

for line in lines:

982

if line == '':

983

# must be at the end

984

if self._size:

985

if not (self._size == pos + 1):

986

raise AssertionError("%s %s" % (self._size, pos))

987

trailers += 1

988

continue

989

elements = line.split('\0')

990

if len(elements) != self._expected_elements:

991

raise errors.BadIndexData(self)

992

# keys are tuples. Each element is a string that may occur many

993

# times, so we intern them to save space. AB, RC, 200807

994

key = tuple([intern(element) for element in elements[:self._key_length]])

995

if first_key is None:

996

first_key = key

997

absent, references, value = elements[-3:]

998

ref_lists = []

999

for ref_string in references.split('\t'):

1000

ref_lists.append(tuple([

1001

int(ref) for ref in ref_string.split('\r') if ref

1002

]))

1003

ref_lists = tuple(ref_lists)

1004

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

1005

pos += len(line) + 1 # +1 for the \n

1006

if absent:

1007

continue

1008

if self.node_ref_lists:

1009

node_value = (value, ref_lists)

1010

else:

1011

node_value = value

1012

nodes.append((key, node_value))

1013

# print "parsed ", key

1014

return first_key, key, nodes, trailers

1015

1016

def _parsed_bytes(self, start, start_key, end, end_key):

1017

"""Mark the bytes from start to end as parsed.

1018

1019

Calling self._parsed_bytes(1,2) will mark one byte (the one at offset

1020

1) as parsed.

1021

1022

:param start: The start of the parsed region.

1023

:param end: The end of the parsed region.

1024

"""

1025

index = self._parsed_byte_index(start)

1026

new_value = (start, end)

1027

new_key = (start_key, end_key)

1028

if index == -1:

1029

# first range parsed is always the beginning.

1030

self._parsed_byte_map.insert(index, new_value)

1031

self._parsed_key_map.insert(index, new_key)

1032

return

1033

# four cases:

1034

# new region

1035

# extend lower region

1036

# extend higher region

1037

# combine two regions

1038

if (index + 1 < len(self._parsed_byte_map) and

1039

self._parsed_byte_map[index][1] == start and

1040

self._parsed_byte_map[index + 1][0] == end):

1041

# combine two regions

1042

self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],

1043

self._parsed_byte_map[index + 1][1])

1044

self._parsed_key_map[index] = (self._parsed_key_map[index][0],

1045

self._parsed_key_map[index + 1][1])

1046

del self._parsed_byte_map[index + 1]

1047

del self._parsed_key_map[index + 1]

1048

elif self._parsed_byte_map[index][1] == start:

1049

# extend the lower entry

1050

self._parsed_byte_map[index] = (

1051

self._parsed_byte_map[index][0], end)

1052

self._parsed_key_map[index] = (

1053

self._parsed_key_map[index][0], end_key)

1054

elif (index + 1 < len(self._parsed_byte_map) and

1055

self._parsed_byte_map[index + 1][0] == end):

1056

# extend the higher entry

1057

self._parsed_byte_map[index + 1] = (

1058

start, self._parsed_byte_map[index + 1][1])

1059

self._parsed_key_map[index + 1] = (

1060

start_key, self._parsed_key_map[index + 1][1])

1061

else:

1062

# new entry

1063

self._parsed_byte_map.insert(index + 1, new_value)

1064

self._parsed_key_map.insert(index + 1, new_key)

1065

1066

def _read_and_parse(self, readv_ranges):

1067

"""Read the the ranges and parse the resulting data.

1068

1069

:param readv_ranges: A prepared readv range list.

1070

"""

1071

if not readv_ranges:

1072

return

1073

if self._nodes is None and self._bytes_read * 2 >= self._size:

1074

# We've already read more than 50% of the file and we are about to

1075

# request more data, just _buffer_all() and be done

1076

self._buffer_all()

1077

return

1078

1079

readv_data = self._transport.readv(self._name, readv_ranges, True,

1080

self._size)

1081

# parse

1082

for offset, data in readv_data:

1083

self._bytes_read += len(data)

1084

if offset == 0 and len(data) == self._size:

1085

# We read the whole range, most likely because the

1086

# Transport upcast our readv ranges into one long request

1087

# for enough total data to grab the whole index.

1088

self._buffer_all(StringIO(data))

1089

return

1090

if self._bisect_nodes is None:

1091

# this must be the start

1092

if not (offset == 0):

1093

raise AssertionError()

1094

offset, data = self._parse_header_from_bytes(data)

1095

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

1096

self._parse_region(offset, data)

1097

1098

def _signature(self):

1099

"""The file signature for this index type."""

1100

return _SIGNATURE

1101

1102

def validate(self):

1103

"""Validate that everything in the index can be accessed."""

1104

# iter_all validates completely at the moment, so just do that.

1105

for node in self.iter_all_entries():

1106

pass

1107

1108

1109

class CombinedGraphIndex(object):

1110

"""A GraphIndex made up from smaller GraphIndices.

1111

1112

The backing indices must implement GraphIndex, and are presumed to be

1113

static data.

1114

1115

Queries against the combined index will be made against the first index,

1116

and then the second and so on. The order of index's can thus influence

1117

performance significantly. For example, if one index is on local disk and a

1118

second on a remote server, the local disk index should be before the other

1119

in the index list.

1120

"""

1121

1122

def __init__(self, indices, reload_func=None):

1123

"""Create a CombinedGraphIndex backed by indices.

1124

1125

:param indices: An ordered list of indices to query for data.

1126

:param reload_func: A function to call if we find we are missing an

1127

index. Should have the form reload_func() => True/False to indicate

1128

if reloading actually changed anything.

1129

"""

1130

self._indices = indices

1131

self._reload_func = reload_func

1132

1133

def __repr__(self):

1134

return "%s(%s)" % (

1135

self.__class__.__name__,

1136

', '.join(map(repr, self._indices)))

1137

1138

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

1139

def get_parents(self, revision_ids):

1140

"""See graph._StackedParentsProvider.get_parents.

1141

1142

This implementation thunks the graph.Graph.get_parents api across to

1143

GraphIndex.

1144

1145

:param revision_ids: An iterable of graph keys for this graph.

1146

:return: A list of parent details for each key in revision_ids.

1147

Each parent details will be one of:

1148

* None when the key was missing

1149

* (NULL_REVISION,) when the key has no parents.

1150

* (parent_key, parent_key...) otherwise.

1151

"""

1152

parent_map = self.get_parent_map(revision_ids)

1153

return [parent_map.get(r, None) for r in revision_ids]

1154

1155

def get_parent_map(self, keys):

1156

"""See graph._StackedParentsProvider.get_parent_map"""

1157

search_keys = set(keys)

1158

if NULL_REVISION in search_keys:

1159

search_keys.discard(NULL_REVISION)

1160

found_parents = {NULL_REVISION:[]}

1161

else:

1162

found_parents = {}

1163

for index, key, value, refs in self.iter_entries(search_keys):

1164

parents = refs[0]

1165

if not parents:

1166

parents = (NULL_REVISION,)

1167

found_parents[key] = parents

1168

return found_parents

1169

1170

def insert_index(self, pos, index):

1171

"""Insert a new index in the list of indices to query.

1172

1173

:param pos: The position to insert the index.

1174

:param index: The index to insert.

1175

"""

1176

self._indices.insert(pos, index)

1177

1178

def iter_all_entries(self):

1179

"""Iterate over all keys within the index

1180

1181

Duplicate keys across child indices are presumed to have the same

1182

value and are only reported once.

1183

1184

:return: An iterable of (index, key, reference_lists, value).

1185

There is no defined order for the result iteration - it will be in

1186

the most efficient order for the index.

1187

"""

1188

seen_keys = set()

1189

while True:

1190

try:

1191

for index in self._indices:

1192

for node in index.iter_all_entries():

1193

if node[1] not in seen_keys:

1194

yield node

1195

seen_keys.add(node[1])

1196

return

1197

except errors.NoSuchFile:

1198

self._reload_or_raise()

1199

1200

def iter_entries(self, keys):

1201

"""Iterate over keys within the index.

1202

1203

Duplicate keys across child indices are presumed to have the same

1204

value and are only reported once.

1205

1206

:param keys: An iterable providing the keys to be retrieved.

1207

:return: An iterable of (index, key, reference_lists, value). There is no

1208

defined order for the result iteration - it will be in the most

1209

efficient order for the index.

1210

"""

1211

keys = set(keys)

1212

while True:

1213

try:

1214

for index in self._indices:

1215

if not keys:

1216

return

1217

for node in index.iter_entries(keys):

1218

keys.remove(node[1])

1219

yield node

1220

return

1221

except errors.NoSuchFile:

1222

self._reload_or_raise()

1223

1224

def iter_entries_prefix(self, keys):

1225

"""Iterate over keys within the index using prefix matching.

1226

1227

Duplicate keys across child indices are presumed to have the same

1228

value and are only reported once.

1229

1230

Prefix matching is applied within the tuple of a key, not to within

1231

the bytestring of each key element. e.g. if you have the keys ('foo',

1232

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1233

only the former key is returned.

1234

1235

:param keys: An iterable providing the key prefixes to be retrieved.

1236

Each key prefix takes the form of a tuple the length of a key, but

1237

with the last N elements 'None' rather than a regular bytestring.

1238

The first element cannot be 'None'.

1239

:return: An iterable as per iter_all_entries, but restricted to the

1240

keys with a matching prefix to those supplied. No additional keys

1241

will be returned, and every match that is in the index will be

1242

returned.

1243

"""

1244

keys = set(keys)

1245

if not keys:

1246

return

1247

seen_keys = set()

1248

while True:

1249

try:

1250

for index in self._indices:

1251

for node in index.iter_entries_prefix(keys):

1252

if node[1] in seen_keys:

1253

continue

1254

seen_keys.add(node[1])

1255

yield node

1256

return

1257

except errors.NoSuchFile:

1258

self._reload_or_raise()

1259

1260

def key_count(self):

1261

"""Return an estimate of the number of keys in this index.

1262

1263

For CombinedGraphIndex this is approximated by the sum of the keys of

1264

the child indices. As child indices may have duplicate keys this can

1265

have a maximum error of the number of child indices * largest number of

1266

keys in any index.

1267

"""

1268

while True:

1269

try:

1270

return sum((index.key_count() for index in self._indices), 0)

1271

except errors.NoSuchFile:

1272

self._reload_or_raise()

1273

1274

def _reload_or_raise(self):

1275

"""We just got a NoSuchFile exception.

1276

1277

Try to reload the indices, if it fails, just raise the current

1278

exception.

1279

"""

1280

if self._reload_func is None:

1281

raise

1282

exc_type, exc_value, exc_traceback = sys.exc_info()

1283

trace.mutter('Trying to reload after getting exception: %s',

1284

exc_value)

1285

if not self._reload_func():

1286

# We tried to reload, but nothing changed, so we fail anyway

1287

trace.mutter('_reload_func indicated nothing has changed.'

1288

' Raising original exception.')

1289

raise exc_type, exc_value, exc_traceback

1290

1291

def validate(self):

1292

"""Validate that everything in the index can be accessed."""

1293

while True:

1294

try:

1295

for index in self._indices:

1296

index.validate()

1297

return

1298

except errors.NoSuchFile:

1299

self._reload_or_raise()

1300

1301

1302

class InMemoryGraphIndex(GraphIndexBuilder):

1303

"""A GraphIndex which operates entirely out of memory and is mutable.

1304

1305

This is designed to allow the accumulation of GraphIndex entries during a

1306

single write operation, where the accumulated entries need to be immediately

1307

available - for example via a CombinedGraphIndex.

1308

"""

1309

1310

def add_nodes(self, nodes):

1311

"""Add nodes to the index.

1312

1313

:param nodes: An iterable of (key, node_refs, value) entries to add.

1314

"""

1315

if self.reference_lists:

1316

for (key, value, node_refs) in nodes:

1317

self.add_node(key, value, node_refs)

1318

else:

1319

for (key, value) in nodes:

1320

self.add_node(key, value)

1321

1322

def iter_all_entries(self):

1323

"""Iterate over all keys within the index

1324

1325

:return: An iterable of (index, key, reference_lists, value). There is no

1326

defined order for the result iteration - it will be in the most

1327

efficient order for the index (in this case dictionary hash order).

1328

"""

1329

if 'evil' in debug.debug_flags:

1330

trace.mutter_callsite(3,

1331

"iter_all_entries scales with size of history.")

1332

if self.reference_lists:

1333

for key, (absent, references, value) in self._nodes.iteritems():

1334

if not absent:

1335

yield self, key, value, references

1336

else:

1337

for key, (absent, references, value) in self._nodes.iteritems():

1338

if not absent:

1339

yield self, key, value

1340

1341

def iter_entries(self, keys):

1342

"""Iterate over keys within the index.

1343

1344

:param keys: An iterable providing the keys to be retrieved.

1345

:return: An iterable of (index, key, value, reference_lists). There is no

1346

defined order for the result iteration - it will be in the most

1347

efficient order for the index (keys iteration order in this case).

1348

"""

1349

keys = set(keys)

1350

if self.reference_lists:

1351

for key in keys.intersection(self._keys):

1352

node = self._nodes[key]

1353

if not node[0]:

1354

yield self, key, node[2], node[1]

1355

else:

1356

for key in keys.intersection(self._keys):

1357

node = self._nodes[key]

1358

if not node[0]:

1359

yield self, key, node[2]

1360

1361

def iter_entries_prefix(self, keys):

1362

"""Iterate over keys within the index using prefix matching.

1363

1364

Prefix matching is applied within the tuple of a key, not to within

1365

the bytestring of each key element. e.g. if you have the keys ('foo',

1366

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1367

only the former key is returned.

1368

1369

:param keys: An iterable providing the key prefixes to be retrieved.

1370

Each key prefix takes the form of a tuple the length of a key, but

1371

with the last N elements 'None' rather than a regular bytestring.

1372

The first element cannot be 'None'.

1373

:return: An iterable as per iter_all_entries, but restricted to the

1374

keys with a matching prefix to those supplied. No additional keys

1375

will be returned, and every match that is in the index will be

1376

returned.

1377

"""

1378

# XXX: To much duplication with the GraphIndex class; consider finding

1379

# a good place to pull out the actual common logic.

1380

keys = set(keys)

1381

if not keys:

1382

return

1383

if self._key_length == 1:

1384

for key in keys:

1385

# sanity check

1386

if key[0] is None:

1387

raise errors.BadIndexKey(key)

1388

if len(key) != self._key_length:

1389

raise errors.BadIndexKey(key)

1390

node = self._nodes[key]

1391

if node[0]:

1392

continue

1393

if self.reference_lists:

1394

yield self, key, node[2], node[1]

1395

else:

1396

yield self, key, node[2]

1397

return

1398

nodes_by_key = self._get_nodes_by_key()

1399

for key in keys:

1400

# sanity check

1401

if key[0] is None:

1402

raise errors.BadIndexKey(key)

1403

if len(key) != self._key_length:

1404

raise errors.BadIndexKey(key)

1405

# find what it refers to:

1406

key_dict = nodes_by_key

1407

elements = list(key)

1408

# find the subdict to return

1409

try:

1410

while len(elements) and elements[0] is not None:

1411

key_dict = key_dict[elements[0]]

1412

elements.pop(0)

1413

except KeyError:

1414

# a non-existant lookup.

1415

continue

1416

if len(elements):

1417

dicts = [key_dict]

1418

while dicts:

1419

key_dict = dicts.pop(-1)

1420

# can't be empty or would not exist

1421

item, value = key_dict.iteritems().next()

1422

if type(value) == dict:

1423

# push keys

1424

dicts.extend(key_dict.itervalues())

1425

else:

1426

# yield keys

1427

for value in key_dict.itervalues():

1428

yield (self, ) + value

1429

else:

1430

yield (self, ) + key_dict

1431

1432

def key_count(self):

1433

"""Return an estimate of the number of keys in this index.

1434

1435

For InMemoryGraphIndex the estimate is exact.

1436

"""

1437

return len(self._keys)

1438

1439

def validate(self):

1440

"""In memory index's have no known corruption at the moment."""

1441

1442

1443

class GraphIndexPrefixAdapter(object):

1444

"""An adapter between GraphIndex with different key lengths.

1445

1446

Queries against this will emit queries against the adapted Graph with the

1447

prefix added, queries for all items use iter_entries_prefix. The returned

1448

nodes will have their keys and node references adjusted to remove the

1449

prefix. Finally, an add_nodes_callback can be supplied - when called the

1450

nodes and references being added will have prefix prepended.

1451

"""

1452

1453

def __init__(self, adapted, prefix, missing_key_length,

1454

add_nodes_callback=None):

1455

"""Construct an adapter against adapted with prefix."""

1456

self.adapted = adapted

1457

self.prefix_key = prefix + (None,)*missing_key_length

1458

self.prefix = prefix

1459

self.prefix_len = len(prefix)

1460

self.add_nodes_callback = add_nodes_callback

1461

1462

def add_nodes(self, nodes):

1463

"""Add nodes to the index.

1464

1465

:param nodes: An iterable of (key, node_refs, value) entries to add.

1466

"""

1467

# save nodes in case its an iterator

1468

nodes = tuple(nodes)

1469

translated_nodes = []

1470

try:

1471

# Add prefix_key to each reference node_refs is a tuple of tuples,

1472

# so split it apart, and add prefix_key to the internal reference

1473

for (key, value, node_refs) in nodes:

1474

adjusted_references = (

1475

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

1476

for ref_list in node_refs))

1477

translated_nodes.append((self.prefix + key, value,

1478

adjusted_references))

1479

except ValueError:

1480

# XXX: TODO add an explicit interface for getting the reference list

1481

# status, to handle this bit of user-friendliness in the API more

1482

# explicitly.

1483

for (key, value) in nodes:

1484

translated_nodes.append((self.prefix + key, value))

1485

self.add_nodes_callback(translated_nodes)

1486

1487

def add_node(self, key, value, references=()):

1488

"""Add a node to the index.

1489

1490

:param key: The key. keys are non-empty tuples containing

1491

as many whitespace-free utf8 bytestrings as the key length

1492

defined for this index.

1493

:param references: An iterable of iterables of keys. Each is a

1494

reference to another key.

1495

:param value: The value to associate with the key. It may be any

1496

bytes as long as it does not contain \0 or \n.

1497

"""

1498

self.add_nodes(((key, value, references), ))

1499

1500

def _strip_prefix(self, an_iter):

1501

"""Strip prefix data from nodes and return it."""

1502

for node in an_iter:

1503

# cross checks

1504

if node[1][:self.prefix_len] != self.prefix:

1505

raise errors.BadIndexData(self)

1506

for ref_list in node[3]:

1507

for ref_node in ref_list:

1508

if ref_node[:self.prefix_len] != self.prefix:

1509

raise errors.BadIndexData(self)

1510

yield node[0], node[1][self.prefix_len:], node[2], (

1511

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

1512

for ref_list in node[3]))

1513

1514

def iter_all_entries(self):

1515

"""Iterate over all keys within the index

1516

1517

iter_all_entries is implemented against the adapted index using

1518

iter_entries_prefix.

1519

1520

:return: An iterable of (index, key, reference_lists, value). There is no

1521

defined order for the result iteration - it will be in the most

1522

efficient order for the index (in this case dictionary hash order).

1523

"""

1524

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

1525

1526

def iter_entries(self, keys):

1527

"""Iterate over keys within the index.

1528

1529

:param keys: An iterable providing the keys to be retrieved.

1530

:return: An iterable of (index, key, value, reference_lists). There is no

1531

defined order for the result iteration - it will be in the most

1532

efficient order for the index (keys iteration order in this case).

1533

"""

1534

return self._strip_prefix(self.adapted.iter_entries(

1535

self.prefix + key for key in keys))

1536

1537

def iter_entries_prefix(self, keys):

1538

"""Iterate over keys within the index using prefix matching.

1539

1540

Prefix matching is applied within the tuple of a key, not to within

1541

the bytestring of each key element. e.g. if you have the keys ('foo',

1542

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

1543

only the former key is returned.

1544

1545

:param keys: An iterable providing the key prefixes to be retrieved.

1546

Each key prefix takes the form of a tuple the length of a key, but

1547

with the last N elements 'None' rather than a regular bytestring.

1548

The first element cannot be 'None'.

1549

:return: An iterable as per iter_all_entries, but restricted to the

1550

keys with a matching prefix to those supplied. No additional keys

1551

will be returned, and every match that is in the index will be

1552

returned.

1553

"""

1554

return self._strip_prefix(self.adapted.iter_entries_prefix(

1555

self.prefix + key for key in keys))

1556

1557

def key_count(self):

1558

"""Return an estimate of the number of keys in this index.

1559

1560

For GraphIndexPrefixAdapter this is relatively expensive - key

1561

iteration with the prefix is done.

1562

"""

1563

return len(list(self.iter_all_entries()))

1564

1565

def validate(self):

1566

"""Call the adapted's validate."""

1567

self.adapted.validate()

Older »