~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2005-07-20 17:30:51 UTC
Revision ID: mbp@sourcefrog.net-20050720173051-e413a67bd9d79701

- more buffering when reading/writing hashcache

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/emacs

contrib/emacs/bzr-mode.el

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/zsh/_bzr

elementtree/ElementTree.py

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

errors,

osutils,

patiencediff,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.osutils import (

100

contains_whitespace,

101

contains_linebreaks,

102

sha_string,

103

sha_strings,

104

split_lines,

105

)

106

from bzrlib.versionedfile import (

107

AbsentContentFactory,

108

adapter_registry,

109

ConstantMapper,

110

ContentFactory,

111

FulltextContentFactory,

112

VersionedFile,

113

VersionedFiles,

114

)

115

116

117

# TODO: Split out code specific to this format into an associated object.

118

119

# TODO: Can we put in some kind of value to check that the index and data

120

# files belong together?

121

122

# TODO: accommodate binaries, perhaps by storing a byte count

123

124

# TODO: function to check whole file

125

126

# TODO: atomically append data, then measure backwards from the cursor

127

# position after writing to work out where it was located. we may need to

128

# bypass python file buffering.

129

130

DATA_SUFFIX = '.knit'

131

INDEX_SUFFIX = '.kndx'

132

133

134

class KnitAdapter(object):

135

"""Base class for knit record adaption."""

136

137

def __init__(self, basis_vf):

138

"""Create an adapter which accesses full texts from basis_vf.

139

140

:param basis_vf: A versioned file to access basis texts of deltas from.

141

May be None for adapters that do not need to access basis texts.

142

"""

143

self._data = KnitVersionedFiles(None, None)

144

self._annotate_factory = KnitAnnotateFactory()

145

self._plain_factory = KnitPlainFactory()

146

self._basis_vf = basis_vf

147

148

149

class FTAnnotatedToUnannotated(KnitAdapter):

150

"""An adapter from FT annotated knits to unannotated ones."""

151

152

def get_bytes(self, factory, annotated_compressed_bytes):

153

rec, contents = \

154

self._data._parse_record_unchecked(annotated_compressed_bytes)

155

content = self._annotate_factory.parse_fulltext(contents, rec[1])

156

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

157

return bytes

158

159

160

class DeltaAnnotatedToUnannotated(KnitAdapter):

161

"""An adapter for deltas from annotated to unannotated."""

162

163

def get_bytes(self, factory, annotated_compressed_bytes):

164

rec, contents = \

165

self._data._parse_record_unchecked(annotated_compressed_bytes)

166

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

167

plain=True)

168

contents = self._plain_factory.lower_line_delta(delta)

169

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

170

return bytes

171

172

173

class FTAnnotatedToFullText(KnitAdapter):

174

"""An adapter from FT annotated knits to unannotated ones."""

175

176

def get_bytes(self, factory, annotated_compressed_bytes):

177

rec, contents = \

178

self._data._parse_record_unchecked(annotated_compressed_bytes)

179

content, delta = self._annotate_factory.parse_record(factory.key[-1],

180

contents, factory._build_details, None)

181

return ''.join(content.text())

182

183

184

class DeltaAnnotatedToFullText(KnitAdapter):

185

"""An adapter for deltas from annotated to unannotated."""

186

187

def get_bytes(self, factory, annotated_compressed_bytes):

188

rec, contents = \

189

self._data._parse_record_unchecked(annotated_compressed_bytes)

190

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

191

plain=True)

192

compression_parent = factory.parents[0]

193

basis_entry = self._basis_vf.get_record_stream(

194

[compression_parent], 'unordered', True).next()

195

if basis_entry.storage_kind == 'absent':

196

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

197

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

198

# Manually apply the delta because we have one annotated content and

199

# one plain.

200

basis_content = PlainKnitContent(basis_lines, compression_parent)

201

basis_content.apply_delta(delta, rec[1])

202

basis_content._should_strip_eol = factory._build_details[1]

203

return ''.join(basis_content.text())

204

205

206

class FTPlainToFullText(KnitAdapter):

207

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory, compressed_bytes):

210

rec, contents = \

211

self._data._parse_record_unchecked(compressed_bytes)

212

content, delta = self._plain_factory.parse_record(factory.key[-1],

213

contents, factory._build_details, None)

214

return ''.join(content.text())

215

216

217

class DeltaPlainToFullText(KnitAdapter):

218

"""An adapter for deltas from annotated to unannotated."""

219

220

def get_bytes(self, factory, compressed_bytes):

221

rec, contents = \

222

self._data._parse_record_unchecked(compressed_bytes)

223

delta = self._plain_factory.parse_line_delta(contents, rec[1])

224

compression_parent = factory.parents[0]

225

# XXX: string splitting overhead.

226

basis_entry = self._basis_vf.get_record_stream(

227

[compression_parent], 'unordered', True).next()

228

if basis_entry.storage_kind == 'absent':

229

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

230

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

231

basis_content = PlainKnitContent(basis_lines, compression_parent)

232

# Manually apply the delta because we have one annotated content and

233

# one plain.

234

content, _ = self._plain_factory.parse_record(rec[1], contents,

235

factory._build_details, basis_content)

236

return ''.join(content.text())

237

238

239

class KnitContentFactory(ContentFactory):

240

"""Content factory for streaming from knits.

241

242

:seealso ContentFactory:

243

"""

244

245

def __init__(self, key, parents, build_details, sha1, raw_record,

246

annotated, knit=None):

247

"""Create a KnitContentFactory for key.

248

249

:param key: The key.

250

:param parents: The parents.

251

:param build_details: The build details as returned from

252

get_build_details.

253

:param sha1: The sha1 expected from the full text of this object.

254

:param raw_record: The bytes of the knit data from disk.

255

:param annotated: True if the raw data is annotated.

256

"""

257

ContentFactory.__init__(self)

258

self.sha1 = sha1

259

self.key = key

260

self.parents = parents

261

if build_details[0] == 'line-delta':

262

kind = 'delta'

263

else:

264

kind = 'ft'

265

if annotated:

266

annotated_kind = 'annotated-'

267

else:

268

annotated_kind = ''

269

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

270

self._raw_record = raw_record

271

self._build_details = build_details

272

self._knit = knit

273

274

def get_bytes_as(self, storage_kind):

275

if storage_kind == self.storage_kind:

276

return self._raw_record

277

if storage_kind == 'fulltext' and self._knit is not None:

278

return self._knit.get_text(self.key[0])

279

else:

280

raise errors.UnavailableRepresentation(self.key, storage_kind,

281

self.storage_kind)

282

283

284

class KnitContent(object):

285

"""Content of a knit version to which deltas can be applied.

286

287

This is always stored in memory as a list of lines with \n at the end,

288

plus a flag saying if the final ending is really there or not, because that

289

corresponds to the on-disk knit representation.

290

"""

291

292

def __init__(self):

293

self._should_strip_eol = False

294

295

def apply_delta(self, delta, new_version_id):

296

"""Apply delta to this object to become new_version_id."""

297

raise NotImplementedError(self.apply_delta)

298

299

def line_delta_iter(self, new_lines):

300

"""Generate line-based delta from this content to new_lines."""

301

new_texts = new_lines.text()

302

old_texts = self.text()

303

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

304

for tag, i1, i2, j1, j2 in s.get_opcodes():

305

if tag == 'equal':

306

continue

307

# ofrom, oto, length, data

308

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

309

310

def line_delta(self, new_lines):

311

return list(self.line_delta_iter(new_lines))

312

313

@staticmethod

314

def get_line_delta_blocks(knit_delta, source, target):

315

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

316

target_len = len(target)

317

s_pos = 0

318

t_pos = 0

319

for s_begin, s_end, t_len, new_text in knit_delta:

320

true_n = s_begin - s_pos

321

n = true_n

322

if n > 0:

323

# knit deltas do not provide reliable info about whether the

324

# last line of a file matches, due to eol handling.

325

if source[s_pos + n -1] != target[t_pos + n -1]:

326

n-=1

327

if n > 0:

328

yield s_pos, t_pos, n

329

t_pos += t_len + true_n

330

s_pos = s_end

331

n = target_len - t_pos

332

if n > 0:

333

if source[s_pos + n -1] != target[t_pos + n -1]:

334

n-=1

335

if n > 0:

336

yield s_pos, t_pos, n

337

yield s_pos + (target_len - t_pos), target_len, 0

338

339

340

class AnnotatedKnitContent(KnitContent):

341

"""Annotated content."""

342

343

def __init__(self, lines):

344

KnitContent.__init__(self)

345

self._lines = lines

346

347

def annotate(self):

348

"""Return a list of (origin, text) for each content line."""

349

lines = self._lines[:]

350

if self._should_strip_eol:

351

origin, last_line = lines[-1]

352

lines[-1] = (origin, last_line.rstrip('\n'))

353

return lines

354

355

def apply_delta(self, delta, new_version_id):

356

"""Apply delta to this object to become new_version_id."""

357

offset = 0

358

lines = self._lines

359

for start, end, count, delta_lines in delta:

360

lines[offset+start:offset+end] = delta_lines

361

offset = offset + (start - end) + count

362

363

def text(self):

364

try:

365

lines = [text for origin, text in self._lines]

366

except ValueError, e:

367

# most commonly (only?) caused by the internal form of the knit

368

# missing annotation information because of a bug - see thread

369

# around 20071015

370

raise KnitCorrupt(self,

371

"line in annotated knit missing annotation information: %s"

372

% (e,))

373

if self._should_strip_eol:

374

lines[-1] = lines[-1].rstrip('\n')

375

return lines

376

377

def copy(self):

378

return AnnotatedKnitContent(self._lines[:])

379

380

381

class PlainKnitContent(KnitContent):

382

"""Unannotated content.

383

384

When annotate[_iter] is called on this content, the same version is reported

385

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

386

objects.

387

"""

388

389

def __init__(self, lines, version_id):

390

KnitContent.__init__(self)

391

self._lines = lines

392

self._version_id = version_id

393

394

def annotate(self):

395

"""Return a list of (origin, text) for each content line."""

396

return [(self._version_id, line) for line in self._lines]

397

398

def apply_delta(self, delta, new_version_id):

399

"""Apply delta to this object to become new_version_id."""

400

offset = 0

401

lines = self._lines

402

for start, end, count, delta_lines in delta:

403

lines[offset+start:offset+end] = delta_lines

404

offset = offset + (start - end) + count

405

self._version_id = new_version_id

406

407

def copy(self):

408

return PlainKnitContent(self._lines[:], self._version_id)

409

410

def text(self):

411

lines = self._lines

412

if self._should_strip_eol:

413

lines = lines[:]

414

lines[-1] = lines[-1].rstrip('\n')

415

return lines

416

417

418

class _KnitFactory(object):

419

"""Base class for common Factory functions."""

420

421

def parse_record(self, version_id, record, record_details,

422

base_content, copy_base_content=True):

423

"""Parse a record into a full content object.

424

425

:param version_id: The official version id for this content

426

:param record: The data returned by read_records_iter()

427

:param record_details: Details about the record returned by

428

get_build_details

429

:param base_content: If get_build_details returns a compression_parent,

430

you must return a base_content here, else use None

431

:param copy_base_content: When building from the base_content, decide

432

you can either copy it and return a new object, or modify it in

433

place.

434

:return: (content, delta) A Content object and possibly a line-delta,

435

delta may be None

436

"""

437

method, noeol = record_details

438

if method == 'line-delta':

439

if copy_base_content:

440

content = base_content.copy()

441

else:

442

content = base_content

443

delta = self.parse_line_delta(record, version_id)

444

content.apply_delta(delta, version_id)

445

else:

446

content = self.parse_fulltext(record, version_id)

447

delta = None

448

content._should_strip_eol = noeol

449

return (content, delta)

450

451

452

class KnitAnnotateFactory(_KnitFactory):

453

"""Factory for creating annotated Content objects."""

454

455

annotated = True

456

457

def make(self, lines, version_id):

458

num_lines = len(lines)

459

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

460

461

def parse_fulltext(self, content, version_id):

462

"""Convert fulltext to internal representation

463

464

fulltext content is of the format

465

revid(utf8) plaintext\n

466

internal representation is of the format:

467

(revid, plaintext)

468

"""

469

# TODO: jam 20070209 The tests expect this to be returned as tuples,

470

# but the code itself doesn't really depend on that.

471

# Figure out a way to not require the overhead of turning the

472

# list back into tuples.

473

lines = [tuple(line.split(' ', 1)) for line in content]

474

return AnnotatedKnitContent(lines)

475

476

def parse_line_delta_iter(self, lines):

477

return iter(self.parse_line_delta(lines))

478

479

def parse_line_delta(self, lines, version_id, plain=False):

480

"""Convert a line based delta into internal representation.

481

482

line delta is in the form of:

483

intstart intend intcount

484

1..count lines:

485

revid(utf8) newline\n

486

internal representation is

487

(start, end, count, [1..count tuples (revid, newline)])

488

489

:param plain: If True, the lines are returned as a plain

490

list without annotations, not as a list of (origin, content) tuples, i.e.

491

(start, end, count, [1..count newline])

492

"""

493

result = []

494

lines = iter(lines)

495

next = lines.next

496

497

cache = {}

498

def cache_and_return(line):

499

origin, text = line.split(' ', 1)

500

return cache.setdefault(origin, origin), text

501

502

# walk through the lines parsing.

503

# Note that the plain test is explicitly pulled out of the

504

# loop to minimise any performance impact

505

if plain:

506

for header in lines:

507

start, end, count = [int(n) for n in header.split(',')]

508

contents = [next().split(' ', 1)[1] for i in xrange(count)]

509

result.append((start, end, count, contents))

510

else:

511

for header in lines:

512

start, end, count = [int(n) for n in header.split(',')]

513

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

514

result.append((start, end, count, contents))

515

return result

516

517

def get_fulltext_content(self, lines):

518

"""Extract just the content lines from a fulltext."""

519

return (line.split(' ', 1)[1] for line in lines)

520

521

def get_linedelta_content(self, lines):

522

"""Extract just the content from a line delta.

523

524

This doesn't return all of the extra information stored in a delta.

525

Only the actual content lines.

526

"""

527

lines = iter(lines)

528

next = lines.next

529

for header in lines:

530

header = header.split(',')

531

count = int(header[2])

532

for i in xrange(count):

533

origin, text = next().split(' ', 1)

534

yield text

535

536

def lower_fulltext(self, content):

537

"""convert a fulltext content record into a serializable form.

538

539

see parse_fulltext which this inverts.

540

"""

541

# TODO: jam 20070209 We only do the caching thing to make sure that

542

# the origin is a valid utf-8 line, eventually we could remove it

543

return ['%s %s' % (o, t) for o, t in content._lines]

544

545

def lower_line_delta(self, delta):

546

"""convert a delta into a serializable form.

547

548

See parse_line_delta which this inverts.

549

"""

550

# TODO: jam 20070209 We only do the caching thing to make sure that

551

# the origin is a valid utf-8 line, eventually we could remove it

552

out = []

553

for start, end, c, lines in delta:

554

out.append('%d,%d,%d\n' % (start, end, c))

555

out.extend(origin + ' ' + text

556

for origin, text in lines)

557

return out

558

559

def annotate(self, knit, key):

560

content = knit._get_content(key)

561

# adjust for the fact that serialised annotations are only key suffixes

562

# for this factory.

563

if type(key) == tuple:

564

prefix = key[:-1]

565

origins = content.annotate()

566

result = []

567

for origin, line in origins:

568

result.append((prefix + (origin,), line))

569

return result

570

else:

571

# XXX: This smells a bit. Why would key ever be a non-tuple here?

572

# Aren't keys defined to be tuples? -- spiv 20080618

573

return content.annotate()

574

575

576

class KnitPlainFactory(_KnitFactory):

577

"""Factory for creating plain Content objects."""

578

579

annotated = False

580

581

def make(self, lines, version_id):

582

return PlainKnitContent(lines, version_id)

583

584

def parse_fulltext(self, content, version_id):

585

"""This parses an unannotated fulltext.

586

587

Note that this is not a noop - the internal representation

588

has (versionid, line) - its just a constant versionid.

589

"""

590

return self.make(content, version_id)

591

592

def parse_line_delta_iter(self, lines, version_id):

593

cur = 0

594

num_lines = len(lines)

595

while cur < num_lines:

596

header = lines[cur]

597

cur += 1

598

start, end, c = [int(n) for n in header.split(',')]

599

yield start, end, c, lines[cur:cur+c]

600

cur += c

601

602

def parse_line_delta(self, lines, version_id):

603

return list(self.parse_line_delta_iter(lines, version_id))

604

605

def get_fulltext_content(self, lines):

606

"""Extract just the content lines from a fulltext."""

607

return iter(lines)

608

609

def get_linedelta_content(self, lines):

610

"""Extract just the content from a line delta.

611

612

This doesn't return all of the extra information stored in a delta.

613

Only the actual content lines.

614

"""

615

lines = iter(lines)

616

next = lines.next

617

for header in lines:

618

header = header.split(',')

619

count = int(header[2])

620

for i in xrange(count):

621

yield next()

622

623

def lower_fulltext(self, content):

624

return content.text()

625

626

def lower_line_delta(self, delta):

627

out = []

628

for start, end, c, lines in delta:

629

out.append('%d,%d,%d\n' % (start, end, c))

630

out.extend(lines)

631

return out

632

633

def annotate(self, knit, key):

634

annotator = _KnitAnnotator(knit)

635

return annotator.annotate(key)

636

637

638

639

def make_file_factory(annotated, mapper):

640

"""Create a factory for creating a file based KnitVersionedFiles.

641

642

This is only functional enough to run interface tests, it doesn't try to

643

provide a full pack environment.

644

645

:param annotated: knit annotations are wanted.

646

:param mapper: The mapper from keys to paths.

647

"""

648

def factory(transport):

649

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

650

access = _KnitKeyAccess(transport, mapper)

651

return KnitVersionedFiles(index, access, annotated=annotated)

652

return factory

653

654

655

def make_pack_factory(graph, delta, keylength):

656

"""Create a factory for creating a pack based VersionedFiles.

657

658

This is only functional enough to run interface tests, it doesn't try to

659

provide a full pack environment.

660

661

:param graph: Store a graph.

662

:param delta: Delta compress contents.

663

:param keylength: How long should keys be.

664

"""

665

def factory(transport):

666

parents = graph or delta

667

ref_length = 0

668

if graph:

669

ref_length += 1

670

if delta:

671

ref_length += 1

672

max_delta_chain = 200

673

else:

674

max_delta_chain = 0

675

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

676

key_elements=keylength)

677

stream = transport.open_write_stream('newpack')

678

writer = pack.ContainerWriter(stream.write)

679

writer.begin()

680

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

681

deltas=delta, add_callback=graph_index.add_nodes)

682

access = _DirectPackAccess({})

683

access.set_writer(writer, graph_index, (transport, 'newpack'))

684

result = KnitVersionedFiles(index, access,

685

max_delta_chain=max_delta_chain)

686

result.stream = stream

687

result.writer = writer

688

return result

689

return factory

690

691

692

def cleanup_pack_knit(versioned_files):

693

versioned_files.stream.close()

694

versioned_files.writer.end()

695

696

697

class KnitVersionedFiles(VersionedFiles):

698

"""Storage for many versioned files using knit compression.

699

700

Backend storage is managed by indices and data objects.

701

702

:ivar _index: A _KnitGraphIndex or similar that can describe the

703

parents, graph, compression and data location of entries in this

704

KnitVersionedFiles. Note that this is only the index for

705

*this* vfs; if there are fallbacks they must be queried separately.

706

"""

707

708

def __init__(self, index, data_access, max_delta_chain=200,

709

annotated=False):

710

"""Create a KnitVersionedFiles with index and data_access.

711

712

:param index: The index for the knit data.

713

:param data_access: The access object to store and retrieve knit

714

records.

715

:param max_delta_chain: The maximum number of deltas to permit during

716

insertion. Set to 0 to prohibit the use of deltas.

717

:param annotated: Set to True to cause annotations to be calculated and

718

stored during insertion.

719

"""

720

self._index = index

721

self._access = data_access

722

self._max_delta_chain = max_delta_chain

723

if annotated:

724

self._factory = KnitAnnotateFactory()

725

else:

726

self._factory = KnitPlainFactory()

727

self._fallback_vfs = []

728

729

def __repr__(self):

730

return "%s(%r, %r)" % (

731

self.__class__.__name__,

732

self._index,

733

self._access)

734

735

def add_fallback_versioned_files(self, a_versioned_files):

736

"""Add a source of texts for texts not present in this knit.

737

738

:param a_versioned_files: A VersionedFiles object.

739

"""

740

self._fallback_vfs.append(a_versioned_files)

741

742

def add_lines(self, key, parents, lines, parent_texts=None,

743

left_matching_blocks=None, nostore_sha=None, random_id=False,

744

check_content=True):

745

"""See VersionedFiles.add_lines()."""

746

self._index._check_write_ok()

747

self._check_add(key, lines, random_id, check_content)

748

if parents is None:

749

# The caller might pass None if there is no graph data, but kndx

750

# indexes can't directly store that, so we give them

751

# an empty tuple instead.

752

parents = ()

753

return self._add(key, lines, parents,

754

parent_texts, left_matching_blocks, nostore_sha, random_id)

755

756

def _add(self, key, lines, parents, parent_texts,

757

left_matching_blocks, nostore_sha, random_id):

758

"""Add a set of lines on top of version specified by parents.

759

760

Any versions not present will be converted into ghosts.

761

"""

762

# first thing, if the content is something we don't need to store, find

763

# that out.

764

line_bytes = ''.join(lines)

765

digest = sha_string(line_bytes)

766

if nostore_sha == digest:

767

raise errors.ExistingContent

768

769

present_parents = []

770

if parent_texts is None:

771

parent_texts = {}

772

# Do a single query to ascertain parent presence.

773

present_parent_map = self.get_parent_map(parents)

774

for parent in parents:

775

if parent in present_parent_map:

776

present_parents.append(parent)

777

778

# Currently we can only compress against the left most present parent.

779

if (len(present_parents) == 0 or

780

present_parents[0] != parents[0]):

781

delta = False

782

else:

783

# To speed the extract of texts the delta chain is limited

784

# to a fixed number of deltas. This should minimize both

785

# I/O and the time spend applying deltas.

786

delta = self._check_should_delta(present_parents[0])

787

788

text_length = len(line_bytes)

789

options = []

790

if lines:

791

if lines[-1][-1] != '\n':

792

# copy the contents of lines.

793

lines = lines[:]

794

options.append('no-eol')

795

lines[-1] = lines[-1] + '\n'

796

line_bytes += '\n'

797

798

for element in key:

799

if type(element) != str:

800

raise TypeError("key contains non-strings: %r" % (key,))

801

# Knit hunks are still last-element only

802

version_id = key[-1]

803

content = self._factory.make(lines, version_id)

804

if 'no-eol' in options:

805

# Hint to the content object that its text() call should strip the

806

# EOL.

807

content._should_strip_eol = True

808

if delta or (self._factory.annotated and len(present_parents) > 0):

809

# Merge annotations from parent texts if needed.

810

delta_hunks = self._merge_annotations(content, present_parents,

811

parent_texts, delta, self._factory.annotated,

812

left_matching_blocks)

813

814

if delta:

815

options.append('line-delta')

816

store_lines = self._factory.lower_line_delta(delta_hunks)

817

size, bytes = self._record_to_data(key, digest,

818

store_lines)

819

else:

820

options.append('fulltext')

821

# isinstance is slower and we have no hierarchy.

822

if self._factory.__class__ == KnitPlainFactory:

823

# Use the already joined bytes saving iteration time in

824

# _record_to_data.

825

size, bytes = self._record_to_data(key, digest,

826

lines, [line_bytes])

827

else:

828

# get mixed annotation + content and feed it into the

829

# serialiser.

830

store_lines = self._factory.lower_fulltext(content)

831

size, bytes = self._record_to_data(key, digest,

832

store_lines)

833

834

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

835

self._index.add_records(

836

((key, options, access_memo, parents),),

837

random_id=random_id)

838

return digest, text_length, content

839

840

def annotate(self, key):

841

"""See VersionedFiles.annotate."""

842

return self._factory.annotate(self, key)

843

844

def check(self, progress_bar=None):

845

"""See VersionedFiles.check()."""

846

# This doesn't actually test extraction of everything, but that will

847

# impact 'bzr check' substantially, and needs to be integrated with

848

# care. However, it does check for the obvious problem of a delta with

849

# no basis.

850

keys = self._index.keys()

851

parent_map = self.get_parent_map(keys)

852

for key in keys:

853

if self._index.get_method(key) != 'fulltext':

854

compression_parent = parent_map[key][0]

855

if compression_parent not in parent_map:

856

raise errors.KnitCorrupt(self,

857

"Missing basis parent %s for %s" % (

858

compression_parent, key))

859

for fallback_vfs in self._fallback_vfs:

860

fallback_vfs.check()

861

862

def _check_add(self, key, lines, random_id, check_content):

863

"""check that version_id and lines are safe to add."""

864

version_id = key[-1]

865

if contains_whitespace(version_id):

866

raise InvalidRevisionId(version_id, self)

867

self.check_not_reserved_id(version_id)

868

# TODO: If random_id==False and the key is already present, we should

869

# probably check that the existing content is identical to what is

870

# being inserted, and otherwise raise an exception. This would make

871

# the bundle code simpler.

872

if check_content:

873

self._check_lines_not_unicode(lines)

874

self._check_lines_are_lines(lines)

875

876

def _check_header(self, key, line):

877

rec = self._split_header(line)

878

self._check_header_version(rec, key[-1])

879

return rec

880

881

def _check_header_version(self, rec, version_id):

882

"""Checks the header version on original format knit records.

883

884

These have the last component of the key embedded in the record.

885

"""

886

if rec[1] != version_id:

887

raise KnitCorrupt(self,

888

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

889

890

def _check_should_delta(self, parent):

891

"""Iterate back through the parent listing, looking for a fulltext.

892

893

This is used when we want to decide whether to add a delta or a new

894

fulltext. It searches for _max_delta_chain parents. When it finds a

895

fulltext parent, it sees if the total size of the deltas leading up to

896

it is large enough to indicate that we want a new full text anyway.

897

898

Return True if we should create a new delta, False if we should use a

899

full text.

900

"""

901

delta_size = 0

902

fulltext_size = None

903

for count in xrange(self._max_delta_chain):

904

# XXX: Collapse these two queries:

905

try:

906

# Note that this only looks in the index of this particular

907

# KnitVersionedFiles, not in the fallbacks. This ensures that

908

# we won't store a delta spanning physical repository

909

# boundaries.

910

method = self._index.get_method(parent)

911

except RevisionNotPresent:

912

# Some basis is not locally present: always delta

913

return False

914

index, pos, size = self._index.get_position(parent)

915

if method == 'fulltext':

916

fulltext_size = size

917

break

918

delta_size += size

919

# We don't explicitly check for presence because this is in an

920

# inner loop, and if it's missing it'll fail anyhow.

921

# TODO: This should be asking for compression parent, not graph

922

# parent.

923

parent = self._index.get_parent_map([parent])[parent][0]

924

else:

925

# We couldn't find a fulltext, so we must create a new one

926

return False

927

# Simple heuristic - if the total I/O wold be greater as a delta than

928

# the originally installed fulltext, we create a new fulltext.

929

return fulltext_size > delta_size

930

931

def _build_details_to_components(self, build_details):

932

"""Convert a build_details tuple to a position tuple."""

933

# record_details, access_memo, compression_parent

934

return build_details[3], build_details[0], build_details[1]

935

936

def _get_components_positions(self, keys, allow_missing=False):

937

"""Produce a map of position data for the components of keys.

938

939

This data is intended to be used for retrieving the knit records.

940

941

A dict of key to (record_details, index_memo, next, parents) is

942

returned.

943

method is the way referenced data should be applied.

944

index_memo is the handle to pass to the data access to actually get the

945

data

946

next is the build-parent of the version, or None for fulltexts.

947

parents is the version_ids of the parents of this version

948

949

:param allow_missing: If True do not raise an error on a missing component,

950

just ignore it.

951

"""

952

component_data = {}

953

pending_components = keys

954

while pending_components:

955

build_details = self._index.get_build_details(pending_components)

956

current_components = set(pending_components)

957

pending_components = set()

958

for key, details in build_details.iteritems():

959

(index_memo, compression_parent, parents,

960

record_details) = details

961

method = record_details[0]

962

if compression_parent is not None:

963

pending_components.add(compression_parent)

964

component_data[key] = self._build_details_to_components(details)

965

missing = current_components.difference(build_details)

966

if missing and not allow_missing:

967

raise errors.RevisionNotPresent(missing.pop(), self)

968

return component_data

969

970

def _get_content(self, key, parent_texts={}):

971

"""Returns a content object that makes up the specified

972

version."""

973

cached_version = parent_texts.get(key, None)

974

if cached_version is not None:

975

# Ensure the cache dict is valid.

976

if not self.get_parent_map([key]):

977

raise RevisionNotPresent(key, self)

978

return cached_version

979

text_map, contents_map = self._get_content_maps([key])

980

return contents_map[key]

981

982

def _get_content_maps(self, keys, nonlocal_keys=None):

983

"""Produce maps of text and KnitContents

984

985

:param keys: The keys to produce content maps for.

986

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

987

which are known to not be in this knit, but rather in one of the

988

fallback knits.

989

:return: (text_map, content_map) where text_map contains the texts for

990

the requested versions and content_map contains the KnitContents.

991

"""

992

# FUTURE: This function could be improved for the 'extract many' case

993

# by tracking each component and only doing the copy when the number of

994

# children than need to apply delta's to it is > 1 or it is part of the

995

# final output.

996

keys = list(keys)

997

multiple_versions = len(keys) != 1

998

record_map = self._get_record_map(keys, allow_missing=True)

999

1000

text_map = {}

1001

content_map = {}

1002

final_content = {}

1003

if nonlocal_keys is None:

1004

nonlocal_keys = set()

1005

else:

1006

nonlocal_keys = frozenset(nonlocal_keys)

1007

missing_keys = set(nonlocal_keys)

1008

for source in self._fallback_vfs:

1009

if not missing_keys:

1010

break

1011

for record in source.get_record_stream(missing_keys,

1012

'unordered', True):

1013

if record.storage_kind == 'absent':

1014

continue

1015

missing_keys.remove(record.key)

1016

lines = split_lines(record.get_bytes_as('fulltext'))

1017

text_map[record.key] = lines

1018

content_map[record.key] = PlainKnitContent(lines, record.key)

1019

if record.key in keys:

1020

final_content[record.key] = content_map[record.key]

1021

for key in keys:

1022

if key in nonlocal_keys:

1023

# already handled

1024

continue

1025

components = []

1026

cursor = key

1027

while cursor is not None:

1028

try:

1029

record, record_details, digest, next = record_map[cursor]

1030

except KeyError:

1031

raise RevisionNotPresent(cursor, self)

1032

components.append((cursor, record, record_details, digest))

1033

cursor = next

1034

if cursor in content_map:

1035

# no need to plan further back

1036

components.append((cursor, None, None, None))

1037

break

1038

1039

content = None

1040

for (component_id, record, record_details,

1041

digest) in reversed(components):

1042

if component_id in content_map:

1043

content = content_map[component_id]

1044

else:

1045

content, delta = self._factory.parse_record(key[-1],

1046

record, record_details, content,

1047

copy_base_content=multiple_versions)

1048

if multiple_versions:

1049

content_map[component_id] = content

1050

1051

final_content[key] = content

1052

1053

# digest here is the digest from the last applied component.

1054

text = content.text()

1055

actual_sha = sha_strings(text)

1056

if actual_sha != digest:

1057

raise KnitCorrupt(self,

1058

'\n sha-1 %s'

1059

'\n of reconstructed text does not match'

1060

'\n expected %s'

1061

'\n for version %s' %

1062

(actual_sha, digest, key))

1063

text_map[key] = text

1064

return text_map, final_content

1065

1066

def get_parent_map(self, keys):

1067

"""Get a map of the graph parents of keys.

1068

1069

:param keys: The keys to look up parents for.

1070

:return: A mapping from keys to parents. Absent keys are absent from

1071

the mapping.

1072

"""

1073

return self._get_parent_map_with_sources(keys)[0]

1074

1075

def _get_parent_map_with_sources(self, keys):

1076

"""Get a map of the parents of keys.

1077

1078

:param keys: The keys to look up parents for.

1079

:return: A tuple. The first element is a mapping from keys to parents.

1080

Absent keys are absent from the mapping. The second element is a

1081

list with the locations each key was found in. The first element

1082

is the in-this-knit parents, the second the first fallback source,

1083

and so on.

1084

"""

1085

result = {}

1086

sources = [self._index] + self._fallback_vfs

1087

source_results = []

1088

missing = set(keys)

1089

for source in sources:

1090

if not missing:

1091

break

1092

new_result = source.get_parent_map(missing)

1093

source_results.append(new_result)

1094

result.update(new_result)

1095

missing.difference_update(set(new_result))

1096

return result, source_results

1097

1098

def _get_record_map(self, keys, allow_missing=False):

1099

"""Produce a dictionary of knit records.

1100

1101

:return: {key:(record, record_details, digest, next)}

1102

record

1103

data returned from read_records

1104

record_details

1105

opaque information to pass to parse_record

1106

digest

1107

SHA1 digest of the full text after all steps are done

1108

1109

build-parent of the version, i.e. the leftmost ancestor.

1110

Will be None if the record is not a delta.

1111

:param keys: The keys to build a map for

1112

:param allow_missing: If some records are missing, rather than

1113

error, just return the data that could be generated.

1114

"""

1115

position_map = self._get_components_positions(keys,

1116

allow_missing=allow_missing)

1117

# key = component_id, r = record_details, i_m = index_memo, n = next

1118

records = [(key, i_m) for key, (r, i_m, n)

1119

in position_map.iteritems()]

1120

record_map = {}

1121

for key, record, digest in \

1122

self._read_records_iter(records):

1123

(record_details, index_memo, next) = position_map[key]

1124

record_map[key] = record, record_details, digest, next

1125

return record_map

1126

1127

def _split_by_prefix(self, keys):

1128

"""For the given keys, split them up based on their prefix.

1129

1130

To keep memory pressure somewhat under control, split the

1131

requests back into per-file-id requests, otherwise "bzr co"

1132

extracts the full tree into memory before writing it to disk.

1133

This should be revisited if _get_content_maps() can ever cross

1134

file-id boundaries.

1135

1136

:param keys: An iterable of key tuples

1137

:return: A dict of {prefix: [key_list]}

1138

"""

1139

split_by_prefix = {}

1140

for key in keys:

1141

if len(key) == 1:

1142

split_by_prefix.setdefault('', []).append(key)

1143

else:

1144

split_by_prefix.setdefault(key[0], []).append(key)

1145

return split_by_prefix

1146

1147

def get_record_stream(self, keys, ordering, include_delta_closure):

1148

"""Get a stream of records for keys.

1149

1150

:param keys: The keys to include.

1151

:param ordering: Either 'unordered' or 'topological'. A topologically

1152

sorted stream has compression parents strictly before their

1153

children.

1154

:param include_delta_closure: If True then the closure across any

1155

compression parents will be included (in the opaque data).

1156

:return: An iterator of ContentFactory objects, each of which is only

1157

valid until the iterator is advanced.

1158

"""

1159

# keys might be a generator

1160

keys = set(keys)

1161

if not keys:

1162

return

1163

if not self._index.has_graph:

1164

# Cannot topological order when no graph has been stored.

1165

ordering = 'unordered'

1166

if include_delta_closure:

1167

positions = self._get_components_positions(keys, allow_missing=True)

1168

else:

1169

build_details = self._index.get_build_details(keys)

1170

# map from key to

1171

# (record_details, access_memo, compression_parent_key)

1172

positions = dict((key, self._build_details_to_components(details))

1173

for key, details in build_details.iteritems())

1174

absent_keys = keys.difference(set(positions))

1175

# There may be more absent keys : if we're missing the basis component

1176

# and are trying to include the delta closure.

1177

if include_delta_closure:

1178

needed_from_fallback = set()

1179

# Build up reconstructable_keys dict. key:True in this dict means

1180

# the key can be reconstructed.

1181

reconstructable_keys = {}

1182

for key in keys:

1183

# the delta chain

1184

try:

1185

chain = [key, positions[key][2]]

1186

except KeyError:

1187

needed_from_fallback.add(key)

1188

continue

1189

result = True

1190

while chain[-1] is not None:

1191

if chain[-1] in reconstructable_keys:

1192

result = reconstructable_keys[chain[-1]]

1193

break

1194

else:

1195

try:

1196

chain.append(positions[chain[-1]][2])

1197

except KeyError:

1198

# missing basis component

1199

needed_from_fallback.add(chain[-1])

1200

result = True

1201

break

1202

for chain_key in chain[:-1]:

1203

reconstructable_keys[chain_key] = result

1204

if not result:

1205

needed_from_fallback.add(key)

1206

# Double index lookups here : need a unified api ?

1207

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1208

if ordering == 'topological':

1209

# Global topological sort

1210

present_keys = tsort.topo_sort(global_map)

1211

# Now group by source:

1212

source_keys = []

1213

current_source = None

1214

for key in present_keys:

1215

for parent_map in parent_maps:

1216

if key in parent_map:

1217

key_source = parent_map

1218

break

1219

if current_source is not key_source:

1220

source_keys.append((key_source, []))

1221

current_source = key_source

1222

source_keys[-1][1].append(key)

1223

else:

1224

if ordering != 'unordered':

1225

raise AssertionError('valid values for ordering are:'

1226

' "unordered" or "topological" not: %r'

1227

% (ordering,))

1228

# Just group by source; remote sources first.

1229

present_keys = []

1230

source_keys = []

1231

for parent_map in reversed(parent_maps):

1232

source_keys.append((parent_map, []))

1233

for key in parent_map:

1234

present_keys.append(key)

1235

source_keys[-1][1].append(key)

1236

absent_keys = keys - set(global_map)

1237

for key in absent_keys:

1238

yield AbsentContentFactory(key)

1239

# restrict our view to the keys we can answer.

1240

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1241

# XXX: At that point we need to consider the impact of double reads by

1242

# utilising components multiple times.

1243

if include_delta_closure:

1244

# XXX: get_content_maps performs its own index queries; allow state

1245

# to be passed in.

1246

non_local_keys = needed_from_fallback - absent_keys

1247

prefix_split_keys = self._split_by_prefix(present_keys)

1248

prefix_split_non_local_keys = self._split_by_prefix(non_local_keys)

1249

for prefix, keys in prefix_split_keys.iteritems():

1250

non_local = prefix_split_non_local_keys.get(prefix, [])

1251

non_local = set(non_local)

1252

text_map, _ = self._get_content_maps(keys, non_local)

1253

for key in keys:

1254

lines = text_map.pop(key)

1255

text = ''.join(lines)

1256

yield FulltextContentFactory(key, global_map[key], None,

1257

text)

1258

else:

1259

for source, keys in source_keys:

1260

if source is parent_maps[0]:

1261

# this KnitVersionedFiles

1262

records = [(key, positions[key][1]) for key in keys]

1263

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1264

(record_details, index_memo, _) = positions[key]

1265

yield KnitContentFactory(key, global_map[key],

1266

record_details, sha1, raw_data, self._factory.annotated, None)

1267

else:

1268

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1269

for record in vf.get_record_stream(keys, ordering,

1270

include_delta_closure):

1271

yield record

1272

1273

def get_sha1s(self, keys):

1274

"""See VersionedFiles.get_sha1s()."""

1275

missing = set(keys)

1276

record_map = self._get_record_map(missing, allow_missing=True)

1277

result = {}

1278

for key, details in record_map.iteritems():

1279

if key not in missing:

1280

continue

1281

# record entry 2 is the 'digest'.

1282

result[key] = details[2]

1283

missing.difference_update(set(result))

1284

for source in self._fallback_vfs:

1285

if not missing:

1286

break

1287

new_result = source.get_sha1s(missing)

1288

result.update(new_result)

1289

missing.difference_update(set(new_result))

1290

return result

1291

1292

def insert_record_stream(self, stream):

1293

"""Insert a record stream into this container.

1294

1295

:param stream: A stream of records to insert.

1296

:return: None

1297

:seealso VersionedFiles.get_record_stream:

1298

"""

1299

def get_adapter(adapter_key):

1300

try:

1301

return adapters[adapter_key]

1302

except KeyError:

1303

adapter_factory = adapter_registry.get(adapter_key)

1304

adapter = adapter_factory(self)

1305

adapters[adapter_key] = adapter

1306

return adapter

1307

if self._factory.annotated:

1308

# self is annotated, we need annotated knits to use directly.

1309

annotated = "annotated-"

1310

convertibles = []

1311

else:

1312

# self is not annotated, but we can strip annotations cheaply.

1313

annotated = ""

1314

convertibles = set(["knit-annotated-ft-gz"])

1315

if self._max_delta_chain:

1316

convertibles.add("knit-annotated-delta-gz")

1317

# The set of types we can cheaply adapt without needing basis texts.

1318

native_types = set()

1319

if self._max_delta_chain:

1320

native_types.add("knit-%sdelta-gz" % annotated)

1321

native_types.add("knit-%sft-gz" % annotated)

1322

knit_types = native_types.union(convertibles)

1323

adapters = {}

1324

# Buffer all index entries that we can't add immediately because their

1325

# basis parent is missing. We don't buffer all because generating

1326

# annotations may require access to some of the new records. However we

1327

# can't generate annotations from new deltas until their basis parent

1328

# is present anyway, so we get away with not needing an index that

1329

# includes the new keys.

1330

# key = basis_parent, value = index entry to add

1331

buffered_index_entries = {}

1332

for record in stream:

1333

parents = record.parents

1334

# Raise an error when a record is missing.

1335

if record.storage_kind == 'absent':

1336

raise RevisionNotPresent([record.key], self)

1337

if record.storage_kind in knit_types:

1338

if record.storage_kind not in native_types:

1339

try:

1340

adapter_key = (record.storage_kind, "knit-delta-gz")

1341

adapter = get_adapter(adapter_key)

1342

except KeyError:

1343

adapter_key = (record.storage_kind, "knit-ft-gz")

1344

adapter = get_adapter(adapter_key)

1345

bytes = adapter.get_bytes(

1346

record, record.get_bytes_as(record.storage_kind))

1347

else:

1348

bytes = record.get_bytes_as(record.storage_kind)

1349

options = [record._build_details[0]]

1350

if record._build_details[1]:

1351

options.append('no-eol')

1352

# Just blat it across.

1353

# Note: This does end up adding data on duplicate keys. As

1354

# modern repositories use atomic insertions this should not

1355

# lead to excessive growth in the event of interrupted fetches.

1356

# 'knit' repositories may suffer excessive growth, but as a

1357

# deprecated format this is tolerable. It can be fixed if

1358

# needed by in the kndx index support raising on a duplicate

1359

# add with identical parents and options.

1360

access_memo = self._access.add_raw_records(

1361

[(record.key, len(bytes))], bytes)[0]

1362

index_entry = (record.key, options, access_memo, parents)

1363

buffered = False

1364

if 'fulltext' not in options:

1365

basis_parent = parents[0]

1366

# Note that pack backed knits don't need to buffer here

1367

# because they buffer all writes to the transaction level,

1368

# but we don't expose that difference at the index level. If

1369

# the query here has sufficient cost to show up in

1370

# profiling we should do that.

1371

if basis_parent not in self.get_parent_map([basis_parent]):

1372

pending = buffered_index_entries.setdefault(

1373

basis_parent, [])

1374

pending.append(index_entry)

1375

buffered = True

1376

if not buffered:

1377

self._index.add_records([index_entry])

1378

elif record.storage_kind == 'fulltext':

1379

self.add_lines(record.key, parents,

1380

split_lines(record.get_bytes_as('fulltext')))

1381

else:

1382

adapter_key = record.storage_kind, 'fulltext'

1383

adapter = get_adapter(adapter_key)

1384

lines = split_lines(adapter.get_bytes(

1385

record, record.get_bytes_as(record.storage_kind)))

1386

try:

1387

self.add_lines(record.key, parents, lines)

1388

except errors.RevisionAlreadyPresent:

1389

pass

1390

# Add any records whose basis parent is now available.

1391

added_keys = [record.key]

1392

while added_keys:

1393

key = added_keys.pop(0)

1394

if key in buffered_index_entries:

1395

index_entries = buffered_index_entries[key]

1396

self._index.add_records(index_entries)

1397

added_keys.extend(

1398

[index_entry[0] for index_entry in index_entries])

1399

del buffered_index_entries[key]

1400

# If there were any deltas which had a missing basis parent, error.

1401

if buffered_index_entries:

1402

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1403

self)

1404

1405

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1406

"""Iterate over the lines in the versioned files from keys.

1407

1408

This may return lines from other keys. Each item the returned

1409

iterator yields is a tuple of a line and a text version that that line

1410

is present in (not introduced in).

1411

1412

Ordering of results is in whatever order is most suitable for the

1413

underlying storage format.

1414

1415

If a progress bar is supplied, it may be used to indicate progress.

1416

The caller is responsible for cleaning up progress bars (because this

1417

is an iterator).

1418

1419

NOTES:

1420

* Lines are normalised by the underlying store: they will all have \n

1421

terminators.

1422

* Lines are returned in arbitrary order.

1423

1424

:return: An iterator over (line, key).

1425

"""

1426

if pb is None:

1427

pb = progress.DummyProgress()

1428

keys = set(keys)

1429

total = len(keys)

1430

# we don't care about inclusions, the caller cares.

1431

# but we need to setup a list of records to visit.

1432

# we need key, position, length

1433

key_records = []

1434

build_details = self._index.get_build_details(keys)

1435

for key, details in build_details.iteritems():

1436

if key in keys:

1437

key_records.append((key, details[0]))

1438

keys.remove(key)

1439

records_iter = enumerate(self._read_records_iter(key_records))

1440

for (key_idx, (key, data, sha_value)) in records_iter:

1441

pb.update('Walking content.', key_idx, total)

1442

compression_parent = build_details[key][1]

1443

if compression_parent is None:

1444

# fulltext

1445

line_iterator = self._factory.get_fulltext_content(data)

1446

else:

1447

# Delta

1448

line_iterator = self._factory.get_linedelta_content(data)

1449

# XXX: It might be more efficient to yield (key,

1450

# line_iterator) in the future. However for now, this is a simpler

1451

# change to integrate into the rest of the codebase. RBC 20071110

1452

for line in line_iterator:

1453

yield line, key

1454

for source in self._fallback_vfs:

1455

if not keys:

1456

break

1457

source_keys = set()

1458

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1459

source_keys.add(key)

1460

yield line, key

1461

keys.difference_update(source_keys)

1462

if keys:

1463

# XXX: strictly the second parameter is meant to be the file id

1464

# but it's not easily accessible here.

1465

raise RevisionNotPresent(keys, repr(self))

1466

pb.update('Walking content.', total, total)

1467

1468

def _make_line_delta(self, delta_seq, new_content):

1469

"""Generate a line delta from delta_seq and new_content."""

1470

diff_hunks = []

1471

for op in delta_seq.get_opcodes():

1472

if op[0] == 'equal':

1473

continue

1474

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1475

return diff_hunks

1476

1477

def _merge_annotations(self, content, parents, parent_texts={},

1478

delta=None, annotated=None,

1479

left_matching_blocks=None):

1480

"""Merge annotations for content and generate deltas.

1481

1482

This is done by comparing the annotations based on changes to the text

1483

and generating a delta on the resulting full texts. If annotations are

1484

not being created then a simple delta is created.

1485

"""

1486

if left_matching_blocks is not None:

1487

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1488

else:

1489

delta_seq = None

1490

if annotated:

1491

for parent_key in parents:

1492

merge_content = self._get_content(parent_key, parent_texts)

1493

if (parent_key == parents[0] and delta_seq is not None):

1494

seq = delta_seq

1495

else:

1496

seq = patiencediff.PatienceSequenceMatcher(

1497

None, merge_content.text(), content.text())

1498

for i, j, n in seq.get_matching_blocks():

1499

if n == 0:

1500

continue

1501

# this copies (origin, text) pairs across to the new

1502

# content for any line that matches the last-checked

1503

# parent.

1504

content._lines[j:j+n] = merge_content._lines[i:i+n]

1505

# XXX: Robert says the following block is a workaround for a

1506

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1507

if content._lines and content._lines[-1][1][-1] != '\n':

1508

# The copied annotation was from a line without a trailing EOL,

1509

# reinstate one for the content object, to ensure correct

1510

# serialization.

1511

line = content._lines[-1][1] + '\n'

1512

content._lines[-1] = (content._lines[-1][0], line)

1513

if delta:

1514

if delta_seq is None:

1515

reference_content = self._get_content(parents[0], parent_texts)

1516

new_texts = content.text()

1517

old_texts = reference_content.text()

1518

delta_seq = patiencediff.PatienceSequenceMatcher(

1519

None, old_texts, new_texts)

1520

return self._make_line_delta(delta_seq, content)

1521

1522

def _parse_record(self, version_id, data):

1523

"""Parse an original format knit record.

1524

1525

These have the last element of the key only present in the stored data.

1526

"""

1527

rec, record_contents = self._parse_record_unchecked(data)

1528

self._check_header_version(rec, version_id)

1529

return record_contents, rec[3]

1530

1531

def _parse_record_header(self, key, raw_data):

1532

"""Parse a record header for consistency.

1533

1534

:return: the header and the decompressor stream.

1535

as (stream, header_record)

1536

"""

1537

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1538

try:

1539

# Current serialise

1540

rec = self._check_header(key, df.readline())

1541

except Exception, e:

1542

raise KnitCorrupt(self,

1543

"While reading {%s} got %s(%s)"

1544

% (key, e.__class__.__name__, str(e)))

1545

return df, rec

1546

1547

def _parse_record_unchecked(self, data):

1548

# profiling notes:

1549

# 4168 calls in 2880 217 internal

1550

# 4168 calls to _parse_record_header in 2121

1551

# 4168 calls to readlines in 330

1552

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1553

try:

1554

record_contents = df.readlines()

1555

except Exception, e:

1556

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1557

(data, e.__class__.__name__, str(e)))

1558

header = record_contents.pop(0)

1559

rec = self._split_header(header)

1560

last_line = record_contents.pop()

1561

if len(record_contents) != int(rec[2]):

1562

raise KnitCorrupt(self,

1563

'incorrect number of lines %s != %s'

1564

' for version {%s} %s'

1565

% (len(record_contents), int(rec[2]),

1566

rec[1], record_contents))

1567

if last_line != 'end %s\n' % rec[1]:

1568

raise KnitCorrupt(self,

1569

'unexpected version end line %r, wanted %r'

1570

% (last_line, rec[1]))

1571

df.close()

1572

return rec, record_contents

1573

1574

def _read_records_iter(self, records):

1575

"""Read text records from data file and yield result.

1576

1577

The result will be returned in whatever is the fastest to read.

1578

Not by the order requested. Also, multiple requests for the same

1579

record will only yield 1 response.

1580

:param records: A list of (key, access_memo) entries

1581

:return: Yields (key, contents, digest) in the order

1582

read, not the order requested

1583

"""

1584

if not records:

1585

return

1586

1587

# XXX: This smells wrong, IO may not be getting ordered right.

1588

needed_records = sorted(set(records), key=operator.itemgetter(1))

1589

if not needed_records:

1590

return

1591

1592

# The transport optimizes the fetching as well

1593

# (ie, reads continuous ranges.)

1594

raw_data = self._access.get_raw_records(

1595

[index_memo for key, index_memo in needed_records])

1596

1597

for (key, index_memo), data in \

1598

izip(iter(needed_records), raw_data):

1599

content, digest = self._parse_record(key[-1], data)

1600

yield key, content, digest

1601

1602

def _read_records_iter_raw(self, records):

1603

"""Read text records from data file and yield raw data.

1604

1605

This unpacks enough of the text record to validate the id is

1606

as expected but thats all.

1607

1608

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1609

"""

1610

# setup an iterator of the external records:

1611

# uses readv so nice and fast we hope.

1612

if len(records):

1613

# grab the disk data needed.

1614

needed_offsets = [index_memo for key, index_memo

1615

in records]

1616

raw_records = self._access.get_raw_records(needed_offsets)

1617

1618

for key, index_memo in records:

1619

data = raw_records.next()

1620

# validate the header (note that we can only use the suffix in

1621

# current knit records).

1622

df, rec = self._parse_record_header(key, data)

1623

df.close()

1624

yield key, data, rec[3]

1625

1626

def _record_to_data(self, key, digest, lines, dense_lines=None):

1627

"""Convert key, digest, lines into a raw data block.

1628

1629

:param key: The key of the record. Currently keys are always serialised

1630

using just the trailing component.

1631

:param dense_lines: The bytes of lines but in a denser form. For

1632

instance, if lines is a list of 1000 bytestrings each ending in \n,

1633

dense_lines may be a list with one line in it, containing all the

1634

1000's lines and their \n's. Using dense_lines if it is already

1635

known is a win because the string join to create bytes in this

1636

function spends less time resizing the final string.

1637

:return: (len, a StringIO instance with the raw data ready to read.)

1638

"""

1639

# Note: using a string copy here increases memory pressure with e.g.

1640

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1641

# when doing the initial commit of a mozilla tree. RBC 20070921

1642

bytes = ''.join(chain(

1643

["version %s %d %s\n" % (key[-1],

1644

len(lines),

1645

digest)],

1646

dense_lines or lines,

1647

["end %s\n" % key[-1]]))

1648

if type(bytes) != str:

1649

raise AssertionError(

1650

'data must be plain bytes was %s' % type(bytes))

1651

if lines and lines[-1][-1] != '\n':

1652

raise ValueError('corrupt lines value %r' % lines)

1653

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1654

return len(compressed_bytes), compressed_bytes

1655

1656

def _split_header(self, line):

1657

rec = line.split()

1658

if len(rec) != 4:

1659

raise KnitCorrupt(self,

1660

'unexpected number of elements in record header')

1661

return rec

1662

1663

def keys(self):

1664

"""See VersionedFiles.keys."""

1665

if 'evil' in debug.debug_flags:

1666

trace.mutter_callsite(2, "keys scales with size of history")

1667

sources = [self._index] + self._fallback_vfs

1668

result = set()

1669

for source in sources:

1670

result.update(source.keys())

1671

return result

1672

1673

1674

1675

class _KndxIndex(object):

1676

"""Manages knit index files

1677

1678

The index is kept in memory and read on startup, to enable

1679

fast lookups of revision information. The cursor of the index

1680

file is always pointing to the end, making it easy to append

1681

entries.

1682

1683

_cache is a cache for fast mapping from version id to a Index

1684

object.

1685

1686

_history is a cache for fast mapping from indexes to version ids.

1687

1688

The index data format is dictionary compressed when it comes to

1689

parent references; a index entry may only have parents that with a

1690

lover index number. As a result, the index is topological sorted.

1691

1692

Duplicate entries may be written to the index for a single version id

1693

if this is done then the latter one completely replaces the former:

1694

this allows updates to correct version and parent information.

1695

Note that the two entries may share the delta, and that successive

1696

annotations and references MUST point to the first entry.

1697

1698

The index file on disc contains a header, followed by one line per knit

1699

record. The same revision can be present in an index file more than once.

1700

The first occurrence gets assigned a sequence number starting from 0.

1701

1702

The format of a single line is

1703

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1704

REVISION_ID is a utf8-encoded revision id

1705

FLAGS is a comma separated list of flags about the record. Values include

1706

no-eol, line-delta, fulltext.

1707

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1708

that the the compressed data starts at.

1709

LENGTH is the ascii representation of the length of the data file.

1710

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1711

REVISION_ID.

1712

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1713

revision id already in the knit that is a parent of REVISION_ID.

1714

The ' :' marker is the end of record marker.

1715

1716

partial writes:

1717

when a write is interrupted to the index file, it will result in a line

1718

that does not end in ' :'. If the ' :' is not present at the end of a line,

1719

or at the end of the file, then the record that is missing it will be

1720

ignored by the parser.

1721

1722

When writing new records to the index file, the data is preceded by '\n'

1723

to ensure that records always start on new lines even if the last write was

1724

interrupted. As a result its normal for the last line in the index to be

1725

missing a trailing newline. One can be added with no harmful effects.

1726

1727

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

1728

where prefix is e.g. the (fileid,) for .texts instances or () for

1729

constant-mapped things like .revisions, and the old state is

1730

tuple(history_vector, cache_dict). This is used to prevent having an

1731

ABI change with the C extension that reads .kndx files.

1732

"""

1733

1734

HEADER = "# bzr knit index 8\n"

1735

1736

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

1737

"""Create a _KndxIndex on transport using mapper."""

1738

self._transport = transport

1739

self._mapper = mapper

1740

self._get_scope = get_scope

1741

self._allow_writes = allow_writes

1742

self._is_locked = is_locked

1743

self._reset_cache()

1744

self.has_graph = True

1745

1746

def add_records(self, records, random_id=False):

1747

"""Add multiple records to the index.

1748

1749

:param records: a list of tuples:

1750

(key, options, access_memo, parents).

1751

:param random_id: If True the ids being added were randomly generated

1752

and no check for existence will be performed.

1753

"""

1754

paths = {}

1755

for record in records:

1756

key = record[0]

1757

prefix = key[:-1]

1758

path = self._mapper.map(key) + '.kndx'

1759

path_keys = paths.setdefault(path, (prefix, []))

1760

path_keys[1].append(record)

1761

for path in sorted(paths):

1762

prefix, path_keys = paths[path]

1763

self._load_prefixes([prefix])

1764

lines = []

1765

orig_history = self._kndx_cache[prefix][1][:]

1766

orig_cache = self._kndx_cache[prefix][0].copy()

1767

1768

try:

1769

for key, options, (_, pos, size), parents in path_keys:

1770

if parents is None:

1771

# kndx indices cannot be parentless.

1772

parents = ()

1773

line = "\n%s %s %s %s %s :" % (

1774

key[-1], ','.join(options), pos, size,

1775

self._dictionary_compress(parents))

1776

if type(line) != str:

1777

raise AssertionError(

1778

'data must be utf8 was %s' % type(line))

1779

lines.append(line)

1780

self._cache_key(key, options, pos, size, parents)

1781

if len(orig_history):

1782

self._transport.append_bytes(path, ''.join(lines))

1783

else:

1784

self._init_index(path, lines)

1785

except:

1786

# If any problems happen, restore the original values and re-raise

1787

self._kndx_cache[prefix] = (orig_cache, orig_history)

1788

raise

1789

1790

def _cache_key(self, key, options, pos, size, parent_keys):

1791

"""Cache a version record in the history array and index cache.

1792

1793

This is inlined into _load_data for performance. KEEP IN SYNC.

1794

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1795

indexes).

1796

"""

1797

prefix = key[:-1]

1798

version_id = key[-1]

1799

# last-element only for compatibilty with the C load_data.

1800

parents = tuple(parent[-1] for parent in parent_keys)

1801

for parent in parent_keys:

1802

if parent[:-1] != prefix:

1803

raise ValueError("mismatched prefixes for %r, %r" % (

1804

key, parent_keys))

1805

cache, history = self._kndx_cache[prefix]

1806

# only want the _history index to reference the 1st index entry

1807

# for version_id

1808

if version_id not in cache:

1809

index = len(history)

1810

history.append(version_id)

1811

else:

1812

index = cache[version_id][5]

1813

cache[version_id] = (version_id,

1814

options,

1815

pos,

1816

size,

1817

parents,

1818

index)

1819

1820

def check_header(self, fp):

1821

line = fp.readline()

1822

if line == '':

1823

# An empty file can actually be treated as though the file doesn't

1824

# exist yet.

1825

raise errors.NoSuchFile(self)

1826

if line != self.HEADER:

1827

raise KnitHeaderError(badline=line, filename=self)

1828

1829

def _check_read(self):

1830

if not self._is_locked():

1831

raise errors.ObjectNotLocked(self)

1832

if self._get_scope() != self._scope:

1833

self._reset_cache()

1834

1835

def _check_write_ok(self):

1836

"""Assert if not writes are permitted."""

1837

if not self._is_locked():

1838

raise errors.ObjectNotLocked(self)

1839

if self._get_scope() != self._scope:

1840

self._reset_cache()

1841

if self._mode != 'w':

1842

raise errors.ReadOnlyObjectDirtiedError(self)

1843

1844

def get_build_details(self, keys):

1845

"""Get the method, index_memo and compression parent for keys.

1846

1847

Ghosts are omitted from the result.

1848

1849

:param keys: An iterable of keys.

1850

:return: A dict of key:(index_memo, compression_parent, parents,

1851

record_details).

1852

index_memo

1853

opaque structure to pass to read_records to extract the raw

1854

data

1855

compression_parent

1856

Content that this record is built upon, may be None

1857

parents

1858

Logical parents of this node

1859

record_details

1860

extra information about the content which needs to be passed to

1861

Factory.parse_record

1862

"""

1863

prefixes = self._partition_keys(keys)

1864

parent_map = self.get_parent_map(keys)

1865

result = {}

1866

for key in keys:

1867

if key not in parent_map:

1868

continue # Ghost

1869

method = self.get_method(key)

1870

parents = parent_map[key]

1871

if method == 'fulltext':

1872

compression_parent = None

1873

else:

1874

compression_parent = parents[0]

1875

noeol = 'no-eol' in self.get_options(key)

1876

index_memo = self.get_position(key)

1877

result[key] = (index_memo, compression_parent,

1878

parents, (method, noeol))

1879

return result

1880

1881

def get_method(self, key):

1882

"""Return compression method of specified key."""

1883

options = self.get_options(key)

1884

if 'fulltext' in options:

1885

return 'fulltext'

1886

elif 'line-delta' in options:

1887

return 'line-delta'

1888

else:

1889

raise errors.KnitIndexUnknownMethod(self, options)

1890

1891

def get_options(self, key):

1892

"""Return a list representing options.

1893

1894

e.g. ['foo', 'bar']

1895

"""

1896

prefix, suffix = self._split_key(key)

1897

self._load_prefixes([prefix])

1898

try:

1899

return self._kndx_cache[prefix][0][suffix][1]

1900

except KeyError:

1901

raise RevisionNotPresent(key, self)

1902

1903

def get_parent_map(self, keys):

1904

"""Get a map of the parents of keys.

1905

1906

:param keys: The keys to look up parents for.

1907

:return: A mapping from keys to parents. Absent keys are absent from

1908

the mapping.

1909

"""

1910

# Parse what we need to up front, this potentially trades off I/O

1911

# locality (.kndx and .knit in the same block group for the same file

1912

# id) for less checking in inner loops.

1913

prefixes = set(key[:-1] for key in keys)

1914

self._load_prefixes(prefixes)

1915

result = {}

1916

for key in keys:

1917

prefix = key[:-1]

1918

try:

1919

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

1920

except KeyError:

1921

pass

1922

else:

1923

result[key] = tuple(prefix + (suffix,) for

1924

suffix in suffix_parents)

1925

return result

1926

1927

def get_position(self, key):

1928

"""Return details needed to access the version.

1929

1930

:return: a tuple (key, data position, size) to hand to the access

1931

logic to get the record.

1932

"""

1933

prefix, suffix = self._split_key(key)

1934

self._load_prefixes([prefix])

1935

entry = self._kndx_cache[prefix][0][suffix]

1936

return key, entry[2], entry[3]

1937

1938

def _init_index(self, path, extra_lines=[]):

1939

"""Initialize an index."""

1940

sio = StringIO()

1941

sio.write(self.HEADER)

1942

sio.writelines(extra_lines)

1943

sio.seek(0)

1944

self._transport.put_file_non_atomic(path, sio,

1945

create_parent_dir=True)

1946

# self._create_parent_dir)

1947

# mode=self._file_mode,

1948

# dir_mode=self._dir_mode)

1949

1950

def keys(self):

1951

"""Get all the keys in the collection.

1952

1953

The keys are not ordered.

1954

"""

1955

result = set()

1956

# Identify all key prefixes.

1957

# XXX: A bit hacky, needs polish.

1958

if type(self._mapper) == ConstantMapper:

1959

prefixes = [()]

1960

else:

1961

relpaths = set()

1962

for quoted_relpath in self._transport.iter_files_recursive():

1963

path, ext = os.path.splitext(quoted_relpath)

1964

relpaths.add(path)

1965

prefixes = [self._mapper.unmap(path) for path in relpaths]

1966

self._load_prefixes(prefixes)

1967

for prefix in prefixes:

1968

for suffix in self._kndx_cache[prefix][1]:

1969

result.add(prefix + (suffix,))

1970

return result

1971

1972

def _load_prefixes(self, prefixes):

1973

"""Load the indices for prefixes."""

1974

self._check_read()

1975

for prefix in prefixes:

1976

if prefix not in self._kndx_cache:

1977

# the load_data interface writes to these variables.

1978

self._cache = {}

1979

self._history = []

1980

self._filename = prefix

1981

try:

1982

path = self._mapper.map(prefix) + '.kndx'

1983

fp = self._transport.get(path)

1984

try:

1985

# _load_data may raise NoSuchFile if the target knit is

1986

# completely empty.

1987

_load_data(self, fp)

1988

finally:

1989

fp.close()

1990

self._kndx_cache[prefix] = (self._cache, self._history)

1991

del self._cache

1992

del self._filename

1993

del self._history

1994

except NoSuchFile:

1995

self._kndx_cache[prefix] = ({}, [])

1996

if type(self._mapper) == ConstantMapper:

1997

# preserve behaviour for revisions.kndx etc.

1998

self._init_index(path)

1999

del self._cache

2000

del self._filename

2001

del self._history

2002

2003

def _partition_keys(self, keys):

2004

"""Turn keys into a dict of prefix:suffix_list."""

2005

result = {}

2006

for key in keys:

2007

prefix_keys = result.setdefault(key[:-1], [])

2008

prefix_keys.append(key[-1])

2009

return result

2010

2011

def _dictionary_compress(self, keys):

2012

"""Dictionary compress keys.

2013

2014

:param keys: The keys to generate references to.

2015

:return: A string representation of keys. keys which are present are

2016

dictionary compressed, and others are emitted as fulltext with a

2017

'.' prefix.

2018

"""

2019

if not keys:

2020

return ''

2021

result_list = []

2022

prefix = keys[0][:-1]

2023

cache = self._kndx_cache[prefix][0]

2024

for key in keys:

2025

if key[:-1] != prefix:

2026

# kndx indices cannot refer across partitioned storage.

2027

raise ValueError("mismatched prefixes for %r" % keys)

2028

if key[-1] in cache:

2029

# -- inlined lookup() --

2030

result_list.append(str(cache[key[-1]][5]))

2031

# -- end lookup () --

2032

else:

2033

result_list.append('.' + key[-1])

2034

return ' '.join(result_list)

2035

2036

def _reset_cache(self):

2037

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2038

# (cache_dict, history_vector) for parsed kndx files.

2039

self._kndx_cache = {}

2040

self._scope = self._get_scope()

2041

allow_writes = self._allow_writes()

2042

if allow_writes:

2043

self._mode = 'w'

2044

else:

2045

self._mode = 'r'

2046

2047

def _split_key(self, key):

2048

"""Split key into a prefix and suffix."""

2049

return key[:-1], key[-1]

2050

2051

2052

class _KnitGraphIndex(object):

2053

"""A KnitVersionedFiles index layered on GraphIndex."""

2054

2055

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2056

add_callback=None):

2057

"""Construct a KnitGraphIndex on a graph_index.

2058

2059

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2060

:param is_locked: A callback to check whether the object should answer

2061

queries.

2062

:param deltas: Allow delta-compressed records.

2063

:param parents: If True, record knits parents, if not do not record

2064

parents.

2065

:param add_callback: If not None, allow additions to the index and call

2066

this callback with a list of added GraphIndex nodes:

2067

[(node, value, node_refs), ...]

2068

:param is_locked: A callback, returns True if the index is locked and

2069

thus usable.

2070

"""

2071

self._add_callback = add_callback

2072

self._graph_index = graph_index

2073

self._deltas = deltas

2074

self._parents = parents

2075

if deltas and not parents:

2076

# XXX: TODO: Delta tree and parent graph should be conceptually

2077

# separate.

2078

raise KnitCorrupt(self, "Cannot do delta compression without "

2079

"parent tracking.")

2080

self.has_graph = parents

2081

self._is_locked = is_locked

2082

2083

def __repr__(self):

2084

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2085

2086

def add_records(self, records, random_id=False):

2087

"""Add multiple records to the index.

2088

2089

This function does not insert data into the Immutable GraphIndex

2090

backing the KnitGraphIndex, instead it prepares data for insertion by

2091

the caller and checks that it is safe to insert then calls

2092

self._add_callback with the prepared GraphIndex nodes.

2093

2094

:param records: a list of tuples:

2095

(key, options, access_memo, parents).

2096

:param random_id: If True the ids being added were randomly generated

2097

and no check for existence will be performed.

2098

"""

2099

if not self._add_callback:

2100

raise errors.ReadOnlyError(self)

2101

# we hope there are no repositories with inconsistent parentage

2102

# anymore.

2103

2104

keys = {}

2105

for (key, options, access_memo, parents) in records:

2106

if self._parents:

2107

parents = tuple(parents)

2108

index, pos, size = access_memo

2109

if 'no-eol' in options:

2110

value = 'N'

2111

else:

2112

value = ' '

2113

value += "%d %d" % (pos, size)

2114

if not self._deltas:

2115

if 'line-delta' in options:

2116

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2117

if self._parents:

2118

if self._deltas:

2119

if 'line-delta' in options:

2120

node_refs = (parents, (parents[0],))

2121

else:

2122

node_refs = (parents, ())

2123

else:

2124

node_refs = (parents, )

2125

else:

2126

if parents:

2127

raise KnitCorrupt(self, "attempt to add node with parents "

2128

"in parentless index.")

2129

node_refs = ()

2130

keys[key] = (value, node_refs)

2131

# check for dups

2132

if not random_id:

2133

present_nodes = self._get_entries(keys)

2134

for (index, key, value, node_refs) in present_nodes:

2135

if (value[0] != keys[key][0][0] or

2136

node_refs != keys[key][1]):

2137

raise KnitCorrupt(self, "inconsistent details in add_records"

2138

": %s %s" % ((value, node_refs), keys[key]))

2139

del keys[key]

2140

result = []

2141

if self._parents:

2142

for key, (value, node_refs) in keys.iteritems():

2143

result.append((key, value, node_refs))

2144

else:

2145

for key, (value, node_refs) in keys.iteritems():

2146

result.append((key, value))

2147

self._add_callback(result)

2148

2149

def _check_read(self):

2150

"""raise if reads are not permitted."""

2151

if not self._is_locked():

2152

raise errors.ObjectNotLocked(self)

2153

2154

def _check_write_ok(self):

2155

"""Assert if writes are not permitted."""

2156

if not self._is_locked():

2157

raise errors.ObjectNotLocked(self)

2158

2159

def _compression_parent(self, an_entry):

2160

# return the key that an_entry is compressed against, or None

2161

# Grab the second parent list (as deltas implies parents currently)

2162

compression_parents = an_entry[3][1]

2163

if not compression_parents:

2164

return None

2165

if len(compression_parents) != 1:

2166

raise AssertionError(

2167

"Too many compression parents: %r" % compression_parents)

2168

return compression_parents[0]

2169

2170

def get_build_details(self, keys):

2171

"""Get the method, index_memo and compression parent for version_ids.

2172

2173

Ghosts are omitted from the result.

2174

2175

:param keys: An iterable of keys.

2176

:return: A dict of key:

2177

(index_memo, compression_parent, parents, record_details).

2178

index_memo

2179

opaque structure to pass to read_records to extract the raw

2180

data

2181

compression_parent

2182

Content that this record is built upon, may be None

2183

parents

2184

Logical parents of this node

2185

record_details

2186

extra information about the content which needs to be passed to

2187

Factory.parse_record

2188

"""

2189

self._check_read()

2190

result = {}

2191

entries = self._get_entries(keys, False)

2192

for entry in entries:

2193

key = entry[1]

2194

if not self._parents:

2195

parents = ()

2196

else:

2197

parents = entry[3][0]

2198

if not self._deltas:

2199

compression_parent_key = None

2200

else:

2201

compression_parent_key = self._compression_parent(entry)

2202

noeol = (entry[2][0] == 'N')

2203

if compression_parent_key:

2204

method = 'line-delta'

2205

else:

2206

method = 'fulltext'

2207

result[key] = (self._node_to_position(entry),

2208

compression_parent_key, parents,

2209

(method, noeol))

2210

return result

2211

2212

def _get_entries(self, keys, check_present=False):

2213

"""Get the entries for keys.

2214

2215

:param keys: An iterable of index key tuples.

2216

"""

2217

keys = set(keys)

2218

found_keys = set()

2219

if self._parents:

2220

for node in self._graph_index.iter_entries(keys):

2221

yield node

2222

found_keys.add(node[1])

2223

else:

2224

# adapt parentless index to the rest of the code.

2225

for node in self._graph_index.iter_entries(keys):

2226

yield node[0], node[1], node[2], ()

2227

found_keys.add(node[1])

2228

if check_present:

2229

missing_keys = keys.difference(found_keys)

2230

if missing_keys:

2231

raise RevisionNotPresent(missing_keys.pop(), self)

2232

2233

def get_method(self, key):

2234

"""Return compression method of specified key."""

2235

return self._get_method(self._get_node(key))

2236

2237

def _get_method(self, node):

2238

if not self._deltas:

2239

return 'fulltext'

2240

if self._compression_parent(node):

2241

return 'line-delta'

2242

else:

2243

return 'fulltext'

2244

2245

def _get_node(self, key):

2246

try:

2247

return list(self._get_entries([key]))[0]

2248

except IndexError:

2249

raise RevisionNotPresent(key, self)

2250

2251

def get_options(self, key):

2252

"""Return a list representing options.

2253

2254

e.g. ['foo', 'bar']

2255

"""

2256

node = self._get_node(key)

2257

options = [self._get_method(node)]

2258

if node[2][0] == 'N':

2259

options.append('no-eol')

2260

return options

2261

2262

def get_parent_map(self, keys):

2263

"""Get a map of the parents of keys.

2264

2265

:param keys: The keys to look up parents for.

2266

:return: A mapping from keys to parents. Absent keys are absent from

2267

the mapping.

2268

"""

2269

self._check_read()

2270

nodes = self._get_entries(keys)

2271

result = {}

2272

if self._parents:

2273

for node in nodes:

2274

result[node[1]] = node[3][0]

2275

else:

2276

for node in nodes:

2277

result[node[1]] = None

2278

return result

2279

2280

def get_position(self, key):

2281

"""Return details needed to access the version.

2282

2283

:return: a tuple (index, data position, size) to hand to the access

2284

logic to get the record.

2285

"""

2286

node = self._get_node(key)

2287

return self._node_to_position(node)

2288

2289

def keys(self):

2290

"""Get all the keys in the collection.

2291

2292

The keys are not ordered.

2293

"""

2294

self._check_read()

2295

return [node[1] for node in self._graph_index.iter_all_entries()]

2296

2297

def _node_to_position(self, node):

2298

"""Convert an index value to position details."""

2299

bits = node[2][1:].split(' ')

2300

return node[0], int(bits[0]), int(bits[1])

2301

2302

2303

class _KnitKeyAccess(object):

2304

"""Access to records in .knit files."""

2305

2306

def __init__(self, transport, mapper):

2307

"""Create a _KnitKeyAccess with transport and mapper.

2308

2309

:param transport: The transport the access object is rooted at.

2310

:param mapper: The mapper used to map keys to .knit files.

2311

"""

2312

self._transport = transport

2313

self._mapper = mapper

2314

2315

def add_raw_records(self, key_sizes, raw_data):

2316

"""Add raw knit bytes to a storage area.

2317

2318

The data is spooled to the container writer in one bytes-record per

2319

raw data item.

2320

2321

:param sizes: An iterable of tuples containing the key and size of each

2322

raw data segment.

2323

:param raw_data: A bytestring containing the data.

2324

:return: A list of memos to retrieve the record later. Each memo is an

2325

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

2326

length), where the key is the record key.

2327

"""

2328

if type(raw_data) != str:

2329

raise AssertionError(

2330

'data must be plain bytes was %s' % type(raw_data))

2331

result = []

2332

offset = 0

2333

# TODO: This can be tuned for writing to sftp and other servers where

2334

# append() is relatively expensive by grouping the writes to each key

2335

# prefix.

2336

for key, size in key_sizes:

2337

path = self._mapper.map(key)

2338

try:

2339

base = self._transport.append_bytes(path + '.knit',

2340

raw_data[offset:offset+size])

2341

except errors.NoSuchFile:

2342

self._transport.mkdir(osutils.dirname(path))

2343

base = self._transport.append_bytes(path + '.knit',

2344

raw_data[offset:offset+size])

2345

# if base == 0:

2346

# chmod.

2347

offset += size

2348

result.append((key, base, size))

2349

return result

2350

2351

def get_raw_records(self, memos_for_retrieval):

2352

"""Get the raw bytes for a records.

2353

2354

:param memos_for_retrieval: An iterable containing the access memo for

2355

retrieving the bytes.

2356

:return: An iterator over the bytes of the records.

2357

"""

2358

# first pass, group into same-index request to minimise readv's issued.

2359

request_lists = []

2360

current_prefix = None

2361

for (key, offset, length) in memos_for_retrieval:

2362

if current_prefix == key[:-1]:

2363

current_list.append((offset, length))

2364

else:

2365

if current_prefix is not None:

2366

request_lists.append((current_prefix, current_list))

2367

current_prefix = key[:-1]

2368

current_list = [(offset, length)]

2369

# handle the last entry

2370

if current_prefix is not None:

2371

request_lists.append((current_prefix, current_list))

2372

for prefix, read_vector in request_lists:

2373

path = self._mapper.map(prefix) + '.knit'

2374

for pos, data in self._transport.readv(path, read_vector):

2375

yield data

2376

2377

2378

class _DirectPackAccess(object):

2379

"""Access to data in one or more packs with less translation."""

2380

2381

def __init__(self, index_to_packs):

2382

"""Create a _DirectPackAccess object.

2383

2384

:param index_to_packs: A dict mapping index objects to the transport

2385

and file names for obtaining data.

2386

"""

2387

self._container_writer = None

2388

self._write_index = None

2389

self._indices = index_to_packs

2390

2391

def add_raw_records(self, key_sizes, raw_data):

2392

"""Add raw knit bytes to a storage area.

2393

2394

The data is spooled to the container writer in one bytes-record per

2395

raw data item.

2396

2397

:param sizes: An iterable of tuples containing the key and size of each

2398

raw data segment.

2399

:param raw_data: A bytestring containing the data.

2400

:return: A list of memos to retrieve the record later. Each memo is an

2401

opaque index memo. For _DirectPackAccess the memo is (index, pos,

2402

length), where the index field is the write_index object supplied

2403

to the PackAccess object.

2404

"""

2405

if type(raw_data) != str:

2406

raise AssertionError(

2407

'data must be plain bytes was %s' % type(raw_data))

2408

result = []

2409

offset = 0

2410

for key, size in key_sizes:

2411

p_offset, p_length = self._container_writer.add_bytes_record(

2412

raw_data[offset:offset+size], [])

2413

offset += size

2414

result.append((self._write_index, p_offset, p_length))

2415

return result

2416

2417

def get_raw_records(self, memos_for_retrieval):

2418

"""Get the raw bytes for a records.

2419

2420

:param memos_for_retrieval: An iterable containing the (index, pos,

2421

length) memo for retrieving the bytes. The Pack access method

2422

looks up the pack to use for a given record in its index_to_pack

2423

map.

2424

:return: An iterator over the bytes of the records.

2425

"""

2426

# first pass, group into same-index requests

2427

request_lists = []

2428

current_index = None

2429

for (index, offset, length) in memos_for_retrieval:

2430

if current_index == index:

2431

current_list.append((offset, length))

2432

else:

2433

if current_index is not None:

2434

request_lists.append((current_index, current_list))

2435

current_index = index

2436

current_list = [(offset, length)]

2437

# handle the last entry

2438

if current_index is not None:

2439

request_lists.append((current_index, current_list))

2440

for index, offsets in request_lists:

2441

transport, path = self._indices[index]

2442

reader = pack.make_readv_reader(transport, path, offsets)

2443

for names, read_func in reader.iter_records():

2444

yield read_func(None)

2445

2446

def set_writer(self, writer, index, transport_packname):

2447

"""Set a writer to use for adding data."""

2448

if index is not None:

2449

self._indices[index] = transport_packname

2450

self._container_writer = writer

2451

self._write_index = index

2452

2453

2454

# Deprecated, use PatienceSequenceMatcher instead

2455

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2456

2457

2458

def annotate_knit(knit, revision_id):

2459

"""Annotate a knit with no cached annotations.

2460

2461

This implementation is for knits with no cached annotations.

2462

It will work for knits with cached annotations, but this is not

2463

recommended.

2464

"""

2465

annotator = _KnitAnnotator(knit)

2466

return iter(annotator.annotate(revision_id))

2467

2468

2469

class _KnitAnnotator(object):

2470

"""Build up the annotations for a text."""

2471

2472

def __init__(self, knit):

2473

self._knit = knit

2474

2475

# Content objects, differs from fulltexts because of how final newlines

2476

# are treated by knits. the content objects here will always have a

2477

# final newline

2478

self._fulltext_contents = {}

2479

2480

# Annotated lines of specific revisions

2481

self._annotated_lines = {}

2482

2483

# Track the raw data for nodes that we could not process yet.

2484

# This maps the revision_id of the base to a list of children that will

2485

# annotated from it.

2486

self._pending_children = {}

2487

2488

# Nodes which cannot be extracted

2489

self._ghosts = set()

2490

2491

# Track how many children this node has, so we know if we need to keep

2492

# it

2493

self._annotate_children = {}

2494

self._compression_children = {}

2495

2496

self._all_build_details = {}

2497

# The children => parent revision_id graph

2498

self._revision_id_graph = {}

2499

2500

self._heads_provider = None

2501

2502

self._nodes_to_keep_annotations = set()

2503

self._generations_until_keep = 100

2504

2505

def set_generations_until_keep(self, value):

2506

"""Set the number of generations before caching a node.

2507

2508

Setting this to -1 will cache every merge node, setting this higher

2509

will cache fewer nodes.

2510

"""

2511

self._generations_until_keep = value

2512

2513

def _add_fulltext_content(self, revision_id, content_obj):

2514

self._fulltext_contents[revision_id] = content_obj

2515

# TODO: jam 20080305 It might be good to check the sha1digest here

2516

return content_obj.text()

2517

2518

def _check_parents(self, child, nodes_to_annotate):

2519

"""Check if all parents have been processed.

2520

2521

:param child: A tuple of (rev_id, parents, raw_content)

2522

:param nodes_to_annotate: If child is ready, add it to

2523

nodes_to_annotate, otherwise put it back in self._pending_children

2524

"""

2525

for parent_id in child[1]:

2526

if (parent_id not in self._annotated_lines):

2527

# This parent is present, but another parent is missing

2528

self._pending_children.setdefault(parent_id,

2529

[]).append(child)

2530

break

2531

else:

2532

# This one is ready to be processed

2533

nodes_to_annotate.append(child)

2534

2535

def _add_annotation(self, revision_id, fulltext, parent_ids,

2536

left_matching_blocks=None):

2537

"""Add an annotation entry.

2538

2539

All parents should already have been annotated.

2540

:return: A list of children that now have their parents satisfied.

2541

"""

2542

a = self._annotated_lines

2543

annotated_parent_lines = [a[p] for p in parent_ids]

2544

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2545

fulltext, revision_id, left_matching_blocks,

2546

heads_provider=self._get_heads_provider()))

2547

self._annotated_lines[revision_id] = annotated_lines

2548

for p in parent_ids:

2549

ann_children = self._annotate_children[p]

2550

ann_children.remove(revision_id)

2551

if (not ann_children

2552

and p not in self._nodes_to_keep_annotations):

2553

del self._annotated_lines[p]

2554

del self._all_build_details[p]

2555

if p in self._fulltext_contents:

2556

del self._fulltext_contents[p]

2557

# Now that we've added this one, see if there are any pending

2558

# deltas to be done, certainly this parent is finished

2559

nodes_to_annotate = []

2560

for child in self._pending_children.pop(revision_id, []):

2561

self._check_parents(child, nodes_to_annotate)

2562

return nodes_to_annotate

2563

2564

def _get_build_graph(self, key):

2565

"""Get the graphs for building texts and annotations.

2566

2567

The data you need for creating a full text may be different than the

2568

data you need to annotate that text. (At a minimum, you need both

2569

parents to create an annotation, but only need 1 parent to generate the

2570

fulltext.)

2571

2572

:return: A list of (key, index_memo) records, suitable for

2573

passing to read_records_iter to start reading in the raw data fro/

2574

the pack file.

2575

"""

2576

if key in self._annotated_lines:

2577

# Nothing to do

2578

return []

2579

pending = set([key])

2580

records = []

2581

generation = 0

2582

kept_generation = 0

2583

while pending:

2584

# get all pending nodes

2585

generation += 1

2586

this_iteration = pending

2587

build_details = self._knit._index.get_build_details(this_iteration)

2588

self._all_build_details.update(build_details)

2589

# new_nodes = self._knit._index._get_entries(this_iteration)

2590

pending = set()

2591

for key, details in build_details.iteritems():

2592

(index_memo, compression_parent, parents,

2593

record_details) = details

2594

self._revision_id_graph[key] = parents

2595

records.append((key, index_memo))

2596

# Do we actually need to check _annotated_lines?

2597

pending.update(p for p in parents

2598

if p not in self._all_build_details)

2599

if compression_parent:

2600

self._compression_children.setdefault(compression_parent,

2601

[]).append(key)

2602

if parents:

2603

for parent in parents:

2604

self._annotate_children.setdefault(parent,

2605

[]).append(key)

2606

num_gens = generation - kept_generation

2607

if ((num_gens >= self._generations_until_keep)

2608

and len(parents) > 1):

2609

kept_generation = generation

2610

self._nodes_to_keep_annotations.add(key)

2611

2612

missing_versions = this_iteration.difference(build_details.keys())

2613

self._ghosts.update(missing_versions)

2614

for missing_version in missing_versions:

2615

# add a key, no parents

2616

self._revision_id_graph[missing_version] = ()

2617

pending.discard(missing_version) # don't look for it

2618

if self._ghosts.intersection(self._compression_children):

2619

raise KnitCorrupt(

2620

"We cannot have nodes which have a ghost compression parent:\n"

2621

"ghosts: %r\n"

2622

"compression children: %r"

2623

% (self._ghosts, self._compression_children))

2624

# Cleanout anything that depends on a ghost so that we don't wait for

2625

# the ghost to show up

2626

for node in self._ghosts:

2627

if node in self._annotate_children:

2628

# We won't be building this node

2629

del self._annotate_children[node]

2630

# Generally we will want to read the records in reverse order, because

2631

# we find the parent nodes after the children

2632

records.reverse()

2633

return records

2634

2635

def _annotate_records(self, records):

2636

"""Build the annotations for the listed records."""

2637

# We iterate in the order read, rather than a strict order requested

2638

# However, process what we can, and put off to the side things that

2639

# still need parents, cleaning them up when those parents are

2640

# processed.

2641

for (rev_id, record,

2642

digest) in self._knit._read_records_iter(records):

2643

if rev_id in self._annotated_lines:

2644

continue

2645

parent_ids = self._revision_id_graph[rev_id]

2646

parent_ids = [p for p in parent_ids if p not in self._ghosts]

2647

details = self._all_build_details[rev_id]

2648

(index_memo, compression_parent, parents,

2649

record_details) = details

2650

nodes_to_annotate = []

2651

# TODO: Remove the punning between compression parents, and

2652

# parent_ids, we should be able to do this without assuming

2653

# the build order

2654

if len(parent_ids) == 0:

2655

# There are no parents for this node, so just add it

2656

# TODO: This probably needs to be decoupled

2657

fulltext_content, delta = self._knit._factory.parse_record(

2658

rev_id, record, record_details, None)

2659

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

2660

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2661

parent_ids, left_matching_blocks=None))

2662

else:

2663

child = (rev_id, parent_ids, record)

2664

# Check if all the parents are present

2665

self._check_parents(child, nodes_to_annotate)

2666

while nodes_to_annotate:

2667

# Should we use a queue here instead of a stack?

2668

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2669

(index_memo, compression_parent, parents,

2670

record_details) = self._all_build_details[rev_id]

2671

if compression_parent is not None:

2672

comp_children = self._compression_children[compression_parent]

2673

if rev_id not in comp_children:

2674

raise AssertionError("%r not in compression children %r"

2675

% (rev_id, comp_children))

2676

# If there is only 1 child, it is safe to reuse this

2677

# content

2678

reuse_content = (len(comp_children) == 1

2679

and compression_parent not in

2680

self._nodes_to_keep_annotations)

2681

if reuse_content:

2682

# Remove it from the cache since it will be changing

2683

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

2684

# Make sure to copy the fulltext since it might be

2685

# modified

2686

parent_fulltext = list(parent_fulltext_content.text())

2687

else:

2688

parent_fulltext_content = self._fulltext_contents[compression_parent]

2689

parent_fulltext = parent_fulltext_content.text()

2690

comp_children.remove(rev_id)

2691

fulltext_content, delta = self._knit._factory.parse_record(

2692

rev_id, record, record_details,

2693

parent_fulltext_content,

2694

copy_base_content=(not reuse_content))

2695

fulltext = self._add_fulltext_content(rev_id,

2696

fulltext_content)

2697

blocks = KnitContent.get_line_delta_blocks(delta,

2698

parent_fulltext, fulltext)

2699

else:

2700

fulltext_content = self._knit._factory.parse_fulltext(

2701

record, rev_id)

2702

fulltext = self._add_fulltext_content(rev_id,

2703

fulltext_content)

2704

blocks = None

2705

nodes_to_annotate.extend(

2706

self._add_annotation(rev_id, fulltext, parent_ids,

2707

left_matching_blocks=blocks))

2708

2709

def _get_heads_provider(self):

2710

"""Create a heads provider for resolving ancestry issues."""

2711

if self._heads_provider is not None:

2712

return self._heads_provider

2713

parent_provider = _mod_graph.DictParentsProvider(

2714

self._revision_id_graph)

2715

graph_obj = _mod_graph.Graph(parent_provider)

2716

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

2717

self._heads_provider = head_cache

2718

return head_cache

2719

2720

def annotate(self, key):

2721

"""Return the annotated fulltext at the given key.

2722

2723

:param key: The key to annotate.

2724

"""

2725

if True or len(self._knit._fallback_vfs) > 0:

2726

# stacked knits can't use the fast path at present.

2727

return self._simple_annotate(key)

2728

records = self._get_build_graph(key)

2729

if key in self._ghosts:

2730

raise errors.RevisionNotPresent(key, self._knit)

2731

self._annotate_records(records)

2732

return self._annotated_lines[key]

2733

2734

def _simple_annotate(self, key):

2735

"""Return annotated fulltext, rediffing from the full texts.

2736

2737

This is slow but makes no assumptions about the repository

2738

being able to produce line deltas.

2739

"""

2740

# TODO: this code generates a parent maps of present ancestors; it

2741

# could be split out into a separate method, and probably should use

2742

# iter_ancestry instead. -- mbp and robertc 20080704

2743

graph = _mod_graph.Graph(self._knit)

2744

head_cache = _mod_graph.FrozenHeadsCache(graph)

2745

search = graph._make_breadth_first_searcher([key])

2746

keys = set()

2747

while True:

2748

try:

2749

present, ghosts = search.next_with_ghosts()

2750

except StopIteration:

2751

break

2752

keys.update(present)

2753

parent_map = self._knit.get_parent_map(keys)

2754

parent_cache = {}

2755

reannotate = annotate.reannotate

2756

for record in self._knit.get_record_stream(keys, 'topological', True):

2757

key = record.key

2758

fulltext = split_lines(record.get_bytes_as('fulltext'))

2759

parents = parent_map[key]

2760

if parents is not None:

2761

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2762

else:

2763

parent_lines = []

2764

parent_cache[key] = list(

2765

reannotate(parent_lines, fulltext, key, None, head_cache))

2766

try:

2767

return parent_cache[key]

2768

except KeyError, e:

2769

raise errors.RevisionNotPresent(key, self._knit)

2770

2771

2772

try:

2773

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2774

except ImportError:

2775

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »