~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Alexander Belchenko
Date: 2007-01-30 23:05:35 UTC
mto: This revision was merged to the branch mainline in revision 2259.
Revision ID: bialix@ukr.net-20070130230535-kx1rd478rtigyc3v

standalone installer: win98 support

files added:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/tests/test_doc_generate.py

bzrlib/transport/http/_pycurl_errors.py

doc/README.1st

files removed:
bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/counted_lock.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/graph.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/format_custom.py

bzrlib/workingtree_4.py

bzrlib/xml7.py

contrib/bzr_access

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/best_practice_intro.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/revnos.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/index.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/package_mf.py

tools/win32/survey.txt

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

doc/bazaar-vcs.org.kid

doc/default.css

generate_docs.py

setup.py *

tools/capture_tree.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

graph as _mod_graph,

lru_cache,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

101

RevisionNotPresent,

102

RevisionAlreadyPresent,

103

)

104

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

105

from bzrlib.osutils import (

106

contains_whitespace,

107

contains_linebreaks,

108

sha_string,

109

sha_strings,

110

)

111

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

135

121

class KnitContent(object):

136

122

"""Content of a knit version to which deltas can be applied."""

137

123

138

def __init__(self):

139

self._should_strip_eol = False

124

def __init__(self, lines):

125

self._lines = lines

126

127

def annotate_iter(self):

128

"""Yield tuples of (origin, text) for each content line."""

129

return iter(self._lines)

140

130

141

131

def annotate(self):

142

132

"""Return a list of (origin, text) tuples."""

143

133

return list(self.annotate_iter())

144

134

145

def apply_delta(self, delta, new_version_id):

146

"""Apply delta to this object to become new_version_id."""

147

raise NotImplementedError(self.apply_delta)

148

149

def cleanup_eol(self, copy_on_mutate=True):

150

if self._should_strip_eol:

151

if copy_on_mutate:

152

self._lines = self._lines[:]

153

self.strip_last_line_newline()

154

155

135

def line_delta_iter(self, new_lines):

156

136

"""Generate line-based delta from this content to new_lines."""

157

137

new_texts = new_lines.text()

158

138

old_texts = self.text()

159

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

139

s = KnitSequenceMatcher(None, old_texts, new_texts)

160

140

for tag, i1, i2, j1, j2 in s.get_opcodes():

161

141

if tag == 'equal':

162

142

continue

166

146

def line_delta(self, new_lines):

167

147

return list(self.line_delta_iter(new_lines))

168

148

169

@staticmethod

170

def get_line_delta_blocks(knit_delta, source, target):

171

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

172

target_len = len(target)

173

s_pos = 0

174

t_pos = 0

175

for s_begin, s_end, t_len, new_text in knit_delta:

176

true_n = s_begin - s_pos

177

n = true_n

178

if n > 0:

179

# knit deltas do not provide reliable info about whether the

180

# last line of a file matches, due to eol handling.

181

if source[s_pos + n -1] != target[t_pos + n -1]:

182

n-=1

183

if n > 0:

184

yield s_pos, t_pos, n

185

t_pos += t_len + true_n

186

s_pos = s_end

187

n = target_len - t_pos

188

if n > 0:

189

if source[s_pos + n -1] != target[t_pos + n -1]:

190

n-=1

191

if n > 0:

192

yield s_pos, t_pos, n

193

yield s_pos + (target_len - t_pos), target_len, 0

194

195

196

class AnnotatedKnitContent(KnitContent):

197

"""Annotated content."""

198

199

def __init__(self, lines):

200

KnitContent.__init__(self)

201

self._lines = lines

202

203

def annotate_iter(self):

204

"""Yield tuples of (origin, text) for each content line."""

205

return iter(self._lines)

206

207

def apply_delta(self, delta, new_version_id):

208

"""Apply delta to this object to become new_version_id."""

209

offset = 0

210

lines = self._lines

211

for start, end, count, delta_lines in delta:

212

lines[offset+start:offset+end] = delta_lines

213

offset = offset + (start - end) + count

214

215

def strip_last_line_newline(self):

216

line = self._lines[-1][1].rstrip('\n')

217

self._lines[-1] = (self._lines[-1][0], line)

218

self._should_strip_eol = False

219

220

def text(self):

221

try:

222

lines = [text for origin, text in self._lines]

223

except ValueError, e:

224

# most commonly (only?) caused by the internal form of the knit

225

# missing annotation information because of a bug - see thread

226

# around 20071015

227

raise KnitCorrupt(self,

228

"line in annotated knit missing annotation information: %s"

229

% (e,))

230

231

if self._should_strip_eol:

232

anno, line = lines[-1]

233

lines[-1] = (anno, line.rstrip('\n'))

234

return lines

235

236

def copy(self):

237

return AnnotatedKnitContent(self._lines[:])

238

239

240

class PlainKnitContent(KnitContent):

241

"""Unannotated content.

242

243

When annotate[_iter] is called on this content, the same version is reported

244

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

245

objects.

246

"""

247

248

def __init__(self, lines, version_id):

249

KnitContent.__init__(self)

250

self._lines = lines

251

self._version_id = version_id

252

253

def annotate_iter(self):

254

"""Yield tuples of (origin, text) for each content line."""

255

for line in self._lines:

256

yield self._version_id, line

257

258

def apply_delta(self, delta, new_version_id):

259

"""Apply delta to this object to become new_version_id."""

260

offset = 0

261

lines = self._lines

262

for start, end, count, delta_lines in delta:

263

lines[offset+start:offset+end] = delta_lines

264

offset = offset + (start - end) + count

265

self._version_id = new_version_id

266

267

def copy(self):

268

return PlainKnitContent(self._lines[:], self._version_id)

269

270

def strip_last_line_newline(self):

271

self._lines[-1] = self._lines[-1].rstrip('\n')

272

self._should_strip_eol = False

273

274

def text(self):

275

lines = self._lines

276

if self._should_strip_eol:

277

lines = lines[:]

278

lines[-1] = lines[-1].rstrip('\n')

279

return lines

149

def text(self):

150

return [text for origin, text in self._lines]

151

152

def copy(self):

153

return KnitContent(self._lines[:])

280

154

281

155

282

156

class _KnitFactory(object):

283

"""Base class for common Factory functions."""

284

285

def parse_record(self, version_id, record, record_details,

286

base_content, copy_base_content=True):

287

"""Parse a record into a full content object.

288

289

:param version_id: The official version id for this content

290

:param record: The data returned by read_records_iter()

291

:param record_details: Details about the record returned by

292

get_build_details

293

:param base_content: If get_build_details returns a compression_parent,

294

you must return a base_content here, else use None

295

:param copy_base_content: When building from the base_content, decide

296

you can either copy it and return a new object, or modify it in

297

place.

298

:return: (content, delta) A Content object and possibly a line-delta,

299

delta may be None

300

"""

301

method, noeol = record_details

302

if method == 'line-delta':

303

assert base_content is not None

304

if copy_base_content:

305

content = base_content.copy()

306

else:

307

content = base_content

308

delta = self.parse_line_delta(record, version_id)

309

content.apply_delta(delta, version_id)

310

else:

311

content = self.parse_fulltext(record, version_id)

312

delta = None

313

content._should_strip_eol = noeol

314

return (content, delta)

157

"""Base factory for creating content objects."""

158

159

def make(self, lines, version):

160

num_lines = len(lines)

161

return KnitContent(zip([version] * num_lines, lines))

315

162

316

163

317

164

class KnitAnnotateFactory(_KnitFactory):

319

166

320

167

annotated = True

321

168

322

def make(self, lines, version_id):

323

num_lines = len(lines)

324

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

325

326

def parse_fulltext(self, content, version_id):

169

def parse_fulltext(self, content, version):

327

170

"""Convert fulltext to internal representation

328

171

329

172

fulltext content is of the format

331

174

internal representation is of the format:

332

175

(revid, plaintext)

333

176

"""

334

# TODO: jam 20070209 The tests expect this to be returned as tuples,

335

# but the code itself doesn't really depend on that.

336

# Figure out a way to not require the overhead of turning the

337

# list back into tuples.

338

lines = [tuple(line.split(' ', 1)) for line in content]

339

return AnnotatedKnitContent(lines)

177

decode_utf8 = cache_utf8.decode

178

lines = []

179

for line in content:

180

origin, text = line.split(' ', 1)

181

lines.append((decode_utf8(origin), text))

182

return KnitContent(lines)

340

183

341

184

def parse_line_delta_iter(self, lines):

342

185

return iter(self.parse_line_delta(lines))

343

186

344

def parse_line_delta(self, lines, version_id, plain=False):

187

def parse_line_delta(self, lines, version):

345

188

"""Convert a line based delta into internal representation.

346

189

347

190

line delta is in the form of:

350

193

revid(utf8) newline\n

351

194

internal representation is

352

195

(start, end, count, [1..count tuples (revid, newline)])

353

354

:param plain: If True, the lines are returned as a plain

355

list without annotations, not as a list of (origin, content) tuples, i.e.

356

(start, end, count, [1..count newline])

357

196

"""

197

decode_utf8 = cache_utf8.decode

358

198

result = []

359

199

lines = iter(lines)

360

200

next = lines.next

361

362

cache = {}

363

def cache_and_return(line):

364

origin, text = line.split(' ', 1)

365

return cache.setdefault(origin, origin), text

366

367

201

# walk through the lines parsing.

368

# Note that the plain test is explicitly pulled out of the

369

# loop to minimise any performance impact

370

if plain:

371

for header in lines:

372

start, end, count = [int(n) for n in header.split(',')]

373

contents = [next().split(' ', 1)[1] for i in xrange(count)]

374

result.append((start, end, count, contents))

375

else:

376

for header in lines:

377

start, end, count = [int(n) for n in header.split(',')]

378

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

379

result.append((start, end, count, contents))

202

for header in lines:

203

start, end, count = [int(n) for n in header.split(',')]

204

contents = []

205

remaining = count

206

while remaining:

207

origin, text = next().split(' ', 1)

208

remaining -= 1

209

contents.append((decode_utf8(origin), text))

210

result.append((start, end, count, contents))

380

211

return result

381

212

382

213

def get_fulltext_content(self, lines):

403

234

404

235

see parse_fulltext which this inverts.

405

236

"""

406

# TODO: jam 20070209 We only do the caching thing to make sure that

407

# the origin is a valid utf-8 line, eventually we could remove it

408

return ['%s %s' % (o, t) for o, t in content._lines]

237

encode_utf8 = cache_utf8.encode

238

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

409

239

410

240

def lower_line_delta(self, delta):

411

241

"""convert a delta into a serializable form.

412

242

413

243

See parse_line_delta which this inverts.

414

244

"""

415

# TODO: jam 20070209 We only do the caching thing to make sure that

416

# the origin is a valid utf-8 line, eventually we could remove it

245

encode_utf8 = cache_utf8.encode

417

246

out = []

418

247

for start, end, c, lines in delta:

419

248

out.append('%d,%d,%d\n' % (start, end, c))

420

out.extend(origin + ' ' + text

249

out.extend(encode_utf8(origin) + ' ' + text

421

250

for origin, text in lines)

422

251

return out

423

252

424

def annotate_iter(self, knit, version_id):

425

content = knit._get_content(version_id)

426

return content.annotate_iter()

427

428

253

429

254

class KnitPlainFactory(_KnitFactory):

430

255

"""Factory for creating plain Content objects."""

431

256

432

257

annotated = False

433

258

434

def make(self, lines, version_id):

435

return PlainKnitContent(lines, version_id)

436

437

def parse_fulltext(self, content, version_id):

259

def parse_fulltext(self, content, version):

438

260

"""This parses an unannotated fulltext.

439

261

440

262

Note that this is not a noop - the internal representation

441

263

has (versionid, line) - its just a constant versionid.

442

264

"""

443

return self.make(content, version_id)

265

return self.make(content, version)

444

266

445

def parse_line_delta_iter(self, lines, version_id):

267

def parse_line_delta_iter(self, lines, version):

446

268

cur = 0

447

269

num_lines = len(lines)

448

270

while cur < num_lines:

449

271

header = lines[cur]

450

272

cur += 1

451

273

start, end, c = [int(n) for n in header.split(',')]

452

yield start, end, c, lines[cur:cur+c]

274

yield start, end, c, zip([version] * c, lines[cur:cur+c])

453

275

cur += c

454

276

455

def parse_line_delta(self, lines, version_id):

456

return list(self.parse_line_delta_iter(lines, version_id))

277

def parse_line_delta(self, lines, version):

278

return list(self.parse_line_delta_iter(lines, version))

457

279

458

280

def get_fulltext_content(self, lines):

459

281

"""Extract just the content lines from a fulltext."""

480

302

out = []

481

303

for start, end, c, lines in delta:

482

304

out.append('%d,%d,%d\n' % (start, end, c))

483

out.extend(lines)

305

out.extend([text for origin, text in lines])

484

306

return out

485

307

486

def annotate_iter(self, knit, version_id):

487

annotator = _KnitAnnotator(knit)

488

return iter(annotator.annotate(version_id))

489

490

308

491

309

def make_empty_knit(transport, relpath):

492

310

"""Construct a empty knit at the specified location."""

493

311

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

312

k._data._open_file()

494

313

495

314

496

315

class KnitVersionedFile(VersionedFile):

509

328

"""

510

329

511

330

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

512

factory=None, delta=True, create=False, create_parent_dir=False,

513

delay_create=False, dir_mode=None, index=None, access_method=None):

331

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

332

create=False, create_parent_dir=False, delay_create=False,

333

dir_mode=None):

514

334

"""Construct a knit at location specified by relpath.

515

335

516

336

:param create: If not True, only open an existing knit.

519

339

hash-prefixes that may not exist yet)

520

340

:param delay_create: The calling code is aware that the knit won't

521

341

actually be created until the first data is stored.

522

:param index: An index to use for the knit.

523

342

"""

343

if deprecated_passed(basis_knit):

344

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

345

" deprecated as of bzr 0.9.",

346

DeprecationWarning, stacklevel=2)

524

347

if access_mode is None:

525

348

access_mode = 'w'

526

349

super(KnitVersionedFile, self).__init__(access_mode)

533

356

534

357

self._max_delta_chain = 200

535

358

536

if index is None:

537

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

538

access_mode, create=create, file_mode=file_mode,

539

create_parent_dir=create_parent_dir, delay_create=delay_create,

540

dir_mode=dir_mode)

541

else:

542

self._index = index

543

if access_method is None:

544

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

545

((create and not len(self)) and delay_create), create_parent_dir)

546

else:

547

_access = access_method

548

if create and not len(self) and not delay_create:

549

_access.create()

550

self._data = _KnitData(_access)

359

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

360

access_mode, create=create, file_mode=file_mode,

361

create_parent_dir=create_parent_dir, delay_create=delay_create,

362

dir_mode=dir_mode)

363

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

364

access_mode, create=create and not len(self), file_mode=file_mode,

365

create_parent_dir=create_parent_dir, delay_create=delay_create,

366

dir_mode=dir_mode)

551

367

552

368

def __repr__(self):

553

return '%s(%s)' % (self.__class__.__name__,

369

return '%s(%s)' % (self.__class__.__name__,

554

370

self.transport.abspath(self.filename))

555

371

556

372

def _check_should_delta(self, first_parents):

570

386

for count in xrange(self._max_delta_chain):

571

387

parent = delta_parents[0]

572

388

method = self._index.get_method(parent)

573

index, pos, size = self._index.get_position(parent)

389

pos, size = self._index.get_position(parent)

574

390

if method == 'fulltext':

575

391

fulltext_size = size

576

392

break

582

398

583

399

return fulltext_size > delta_size

584

400

401

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

402

"""See VersionedFile._add_delta()."""

403

self._check_add(version_id, []) # should we check the lines ?

404

self._check_versions_present(parents)

405

present_parents = []

406

ghosts = []

407

parent_texts = {}

408

for parent in parents:

409

if not self.has_version(parent):

410

ghosts.append(parent)

411

else:

412

present_parents.append(parent)

413

414

if delta_parent is None:

415

# reconstitute as full text.

416

assert len(delta) == 1 or len(delta) == 0

417

if len(delta):

418

assert delta[0][0] == 0

419

assert delta[0][1] == 0, delta[0][1]

420

return super(KnitVersionedFile, self)._add_delta(version_id,

421

parents,

422

delta_parent,

423

sha1,

424

noeol,

425

delta)

426

427

digest = sha1

428

429

options = []

430

if noeol:

431

options.append('no-eol')

432

433

if delta_parent is not None:

434

# determine the current delta chain length.

435

# To speed the extract of texts the delta chain is limited

436

# to a fixed number of deltas. This should minimize both

437

# I/O and the time spend applying deltas.

438

# The window was changed to a maximum of 200 deltas, but also added

439

# was a check that the total compressed size of the deltas is

440

# smaller than the compressed size of the fulltext.

441

if not self._check_should_delta([delta_parent]):

442

# We don't want a delta here, just do a normal insertion.

443

return super(KnitVersionedFile, self)._add_delta(version_id,

444

parents,

445

delta_parent,

446

sha1,

447

noeol,

448

delta)

449

450

options.append('line-delta')

451

store_lines = self.factory.lower_line_delta(delta)

452

453

where, size = self._data.add_record(version_id, digest, store_lines)

454

self._index.add_version(version_id, options, where, size, parents)

455

585

456

def _add_raw_records(self, records, data):

586

457

"""Add all the records 'records' with data pre-joined in 'data'.

587

458

591

462

the preceding records sizes.

592

463

"""

593

464

# write all the data

594

raw_record_sizes = [record[3] for record in records]

595

positions = self._data.add_raw_records(raw_record_sizes, data)

465

pos = self._data.add_raw_record(data)

596

466

offset = 0

597

467

index_entries = []

598

for (version_id, options, parents, size), access_memo in zip(

599

records, positions):

600

index_entries.append((version_id, options, access_memo, parents))

468

for (version_id, options, parents, size) in records:

469

index_entries.append((version_id, options, pos+offset,

470

size, parents))

601

471

if self._data._do_cache:

602

472

self._data._cache[version_id] = data[offset:offset+size]

603

473

offset += size

630

500

return KnitVersionedFile(name, transport, factory=self.factory,

631

501

delta=self.delta, create=True)

632

502

633

def get_data_stream(self, required_versions):

634

"""Get a data stream for the specified versions.

635

636

Versions may be returned in any order, not necessarily the order

637

specified. They are returned in a partial order by compression

638

parent, so that the deltas can be applied as the data stream is

639

inserted; however note that compression parents will not be sent

640

unless they were specifically requested, as the client may already

641

have them.

642

643

:param required_versions: The exact set of versions to be extracted.

644

Unlike some other knit methods, this is not used to generate a

645

transitive closure, rather it is used precisely as given.

503

def _fix_parents(self, version, new_parents):

504

"""Fix the parents list for version.

646

505

647

:returns: format_signature, list of (version, options, length, parents),

648

reader_callable.

506

This is done by appending a new version to the index

507

with identical data except for the parents list.

508

the parents list must be a superset of the current

509

list.

649

510

"""

650

required_version_set = frozenset(required_versions)

651

version_index = {}

652

# list of revisions that can just be sent without waiting for their

653

# compression parent

654

ready_to_send = []

655

# map from revision to the children based on it

656

deferred = {}

657

# first, read all relevant index data, enough to sort into the right

658

# order to return

659

for version_id in required_versions:

660

options = self._index.get_options(version_id)

661

parents = self._index.get_parents_with_ghosts(version_id)

662

index_memo = self._index.get_position(version_id)

663

version_index[version_id] = (index_memo, options, parents)

664

if ('line-delta' in options

665

and parents[0] in required_version_set):

666

# must wait until the parent has been sent

667

deferred.setdefault(parents[0], []). \

668

append(version_id)

669

else:

670

# either a fulltext, or a delta whose parent the client did

671

# not ask for and presumably already has

672

ready_to_send.append(version_id)

673

# build a list of results to return, plus instructions for data to

674

# read from the file

675

copy_queue_records = []

676

temp_version_list = []

677

while ready_to_send:

678

# XXX: pushing and popping lists may be a bit inefficient

679

version_id = ready_to_send.pop(0)

680

(index_memo, options, parents) = version_index[version_id]

681

copy_queue_records.append((version_id, index_memo))

682

none, data_pos, data_size = index_memo

683

temp_version_list.append((version_id, options, data_size,

684

parents))

685

if version_id in deferred:

686

# now we can send all the children of this revision - we could

687

# put them in anywhere, but we hope that sending them soon

688

# after the fulltext will give good locality in the receiver

689

ready_to_send[:0] = deferred.pop(version_id)

690

assert len(deferred) == 0, \

691

"Still have compressed child versions waiting to be sent"

692

# XXX: The stream format is such that we cannot stream it - we have to

693

# know the length of all the data a-priori.

694

raw_datum = []

695

result_version_list = []

696

for (version_id, raw_data), \

697

(version_id2, options, _, parents) in \

698

izip(self._data.read_records_iter_raw(copy_queue_records),

699

temp_version_list):

700

assert version_id == version_id2, \

701

'logic error, inconsistent results'

702

raw_datum.append(raw_data)

703

result_version_list.append(

704

(version_id, options, len(raw_data), parents))

705

# provide a callback to get data incrementally.

706

pseudo_file = StringIO(''.join(raw_datum))

707

def read(length):

708

if length is None:

709

return pseudo_file.read()

710

else:

711

return pseudo_file.read(length)

712

return (self.get_format_signature(), result_version_list, read)

713

714

def _extract_blocks(self, version_id, source, target):

715

if self._index.get_method(version_id) != 'line-delta':

716

return None

717

parent, sha1, noeol, delta = self.get_delta(version_id)

718

return KnitContent.get_line_delta_blocks(delta, source, target)

511

current_values = self._index._cache[version]

512

assert set(current_values[4]).difference(set(new_parents)) == set()

513

self._index.add_version(version,

514

current_values[1],

515

current_values[2],

516

current_values[3],

517

new_parents)

719

518

720

519

def get_delta(self, version_id):

721

520

"""Get a delta for constructing version from some other version."""

722

521

self.check_not_reserved_id(version_id)

522

if not self.has_version(version_id):

523

raise RevisionNotPresent(version_id, self.filename)

524

723

525

parents = self.get_parents(version_id)

724

526

if len(parents):

725

527

parent = parents[0]

726

528

else:

727

529

parent = None

728

index_memo = self._index.get_position(version_id)

729

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

530

data_pos, data_size = self._index.get_position(version_id)

531

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

532

version_idx = self._index.lookup(version_id)

730

533

noeol = 'no-eol' in self._index.get_options(version_id)

731

534

if 'fulltext' == self._index.get_method(version_id):

732

new_content = self.factory.parse_fulltext(data, version_id)

535

new_content = self.factory.parse_fulltext(data, version_idx)

733

536

if parent is not None:

734

537

reference_content = self._get_content(parent)

735

538

old_texts = reference_content.text()

736

539

else:

737

540

old_texts = []

738

541

new_texts = new_content.text()

739

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

740

new_texts)

542

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

741

543

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

742

544

else:

743

delta = self.factory.parse_line_delta(data, version_id)

545

delta = self.factory.parse_line_delta(data, version_idx)

744

546

return parent, sha1, noeol, delta

745

746

def get_format_signature(self):

747

"""See VersionedFile.get_format_signature()."""

748

if self.factory.annotated:

749

annotated_part = "annotated"

750

else:

751

annotated_part = "plain"

752

return "knit-%s" % (annotated_part,)

753

547

754

548

def get_graph_with_ghosts(self):

755

549

"""See VersionedFile.get_graph_with_ghosts()."""

757

551

return dict(graph_items)

758

552

759

553

def get_sha1(self, version_id):

760

return self.get_sha1s([version_id])[0]

761

762

def get_sha1s(self, version_ids):

763

554

"""See VersionedFile.get_sha1()."""

764

record_map = self._get_record_map(version_ids)

765

# record entry 2 is the 'digest'.

766

return [record_map[v][2] for v in version_ids]

555

record_map = self._get_record_map([version_id])

556

method, content, digest, next = record_map[version_id]

557

return digest

767

558

768

559

@staticmethod

769

560

def get_suffixes():

784

575

return True

785

576

return False

786

577

787

def insert_data_stream(self, (format, data_list, reader_callable)):

788

"""Insert knit records from a data stream into this knit.

789

790

If a version in the stream is already present in this knit, it will not

791

be inserted a second time. It will be checked for consistency with the

792

stored version however, and may cause a KnitCorrupt error to be raised

793

if the data in the stream disagrees with the already stored data.

794

795

:seealso: get_data_stream

796

"""

797

if format != self.get_format_signature():

798

if 'knit' in debug.debug_flags:

799

trace.mutter(

800

'incompatible format signature inserting to %r', self)

801

source = self._knit_from_datastream(

802

(format, data_list, reader_callable))

803

self.join(source)

804

return

805

806

for version_id, options, length, parents in data_list:

807

if self.has_version(version_id):

808

# First check: the list of parents.

809

my_parents = self.get_parents_with_ghosts(version_id)

810

if tuple(my_parents) != tuple(parents):

811

# XXX: KnitCorrupt is not quite the right exception here.

812

raise KnitCorrupt(

813

self.filename,

814

'parents list %r from data stream does not match '

815

'already recorded parents %r for %s'

816

% (parents, my_parents, version_id))

817

818

# Also check the SHA-1 of the fulltext this content will

819

# produce.

820

raw_data = reader_callable(length)

821

my_fulltext_sha1 = self.get_sha1(version_id)

822

df, rec = self._data._parse_record_header(version_id, raw_data)

823

stream_fulltext_sha1 = rec[3]

824

if my_fulltext_sha1 != stream_fulltext_sha1:

825

# Actually, we don't know if it's this knit that's corrupt,

826

# or the data stream we're trying to insert.

827

raise KnitCorrupt(

828

self.filename, 'sha-1 does not match %s' % version_id)

829

else:

830

if 'line-delta' in options:

831

# Make sure that this knit record is actually useful: a

832

# line-delta is no use unless we have its parent.

833

# Fetching from a broken repository with this problem

834

# shouldn't break the target repository.

835

836

# See https://bugs.launchpad.net/bzr/+bug/164443

837

if not self._index.has_version(parents[0]):

838

raise KnitCorrupt(

839

self.filename,

840

'line-delta from stream '

841

'for version %s '

842

'references '

843

'missing parent %s\n'

844

'Try running "bzr check" '

845

'on the source repository, and "bzr reconcile" '

846

'if necessary.' %

847

(version_id, parents[0]))

848

self._add_raw_records(

849

[(version_id, options, parents, length)],

850

reader_callable(length))

851

852

def _knit_from_datastream(self, (format, data_list, reader_callable)):

853

"""Create a knit object from a data stream.

854

855

This method exists to allow conversion of data streams that do not

856

match the signature of this knit. Generally it will be slower and use

857

more memory to use this method to insert data, but it will work.

858

859

:seealso: get_data_stream for details on datastreams.

860

:return: A knit versioned file which can be used to join the datastream

861

into self.

862

"""

863

if format == "knit-plain":

864

factory = KnitPlainFactory()

865

elif format == "knit-annotated":

866

factory = KnitAnnotateFactory()

867

else:

868

raise errors.KnitDataStreamUnknown(format)

869

index = _StreamIndex(data_list, self._index)

870

access = _StreamAccess(reader_callable, index, self, factory)

871

return KnitVersionedFile(self.filename, self.transport,

872

factory=factory, index=index, access_method=access)

873

874

578

def versions(self):

875

579

"""See VersionedFile.versions."""

876

if 'evil' in debug.debug_flags:

877

trace.mutter_callsite(2, "versions scales with size of history")

878

580

return self._index.get_versions()

879

581

880

582

def has_version(self, version_id):

881

583

"""See VersionedFile.has_version."""

882

if 'evil' in debug.debug_flags:

883

trace.mutter_callsite(2, "has_version is a LBYL scenario")

884

584

return self._index.has_version(version_id)

885

585

886

586

__contains__ = has_version

887

587

888

588

def _merge_annotations(self, content, parents, parent_texts={},

889

delta=None, annotated=None,

890

left_matching_blocks=None):

589

delta=None, annotated=None):

891

590

"""Merge annotations for content. This is done by comparing

892

591

the annotations based on changed to the text.

893

592

"""

894

if left_matching_blocks is not None:

895

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

896

else:

593

if annotated:

897

594

delta_seq = None

898

if annotated:

899

595

for parent_id in parents:

900

596

merge_content = self._get_content(parent_id, parent_texts)

901

if (parent_id == parents[0] and delta_seq is not None):

902

seq = delta_seq

903

else:

904

seq = patiencediff.PatienceSequenceMatcher(

905

None, merge_content.text(), content.text())

597

seq = patiencediff.PatienceSequenceMatcher(

598

None, merge_content.text(), content.text())

599

if delta_seq is None:

600

# setup a delta seq to reuse.

601

delta_seq = seq

906

602

for i, j, n in seq.get_matching_blocks():

907

603

if n == 0:

908

604

continue

909

# this appears to copy (origin, text) pairs across to the

910

# new content for any line that matches the last-checked

911

# parent.

605

# this appears to copy (origin, text) pairs across to the new

606

# content for any line that matches the last-checked parent.

607

# FIXME: save the sequence control data for delta compression

608

# against the most relevant parent rather than rediffing.

912

609

content._lines[j:j+n] = merge_content._lines[i:i+n]

913

610

if delta:

914

if delta_seq is None:

611

if not annotated:

915

612

reference_content = self._get_content(parents[0], parent_texts)

916

613

new_texts = content.text()

917

614

old_texts = reference_content.text()

933

630

934

631

This data is intended to be used for retrieving the knit records.

935

632

936

A dict of version_id to (record_details, index_memo, next, parents) is

633

A dict of version_id to (method, data_pos, data_size, next) is

937

634

returned.

938

635

method is the way referenced data should be applied.

939

index_memo is the handle to pass to the data access to actually get the

940

data

636

data_pos is the position of the data in the knit.

637

data_size is the size of the data in the knit.

941

638

next is the build-parent of the version, or None for fulltexts.

942

parents is the version_ids of the parents of this version

943

639

"""

944

640

component_data = {}

945

pending_components = version_ids

946

while pending_components:

947

build_details = self._index.get_build_details(pending_components)

948

current_components = set(pending_components)

949

pending_components = set()

950

for version_id, details in build_details.iteritems():

951

(index_memo, compression_parent, parents,

952

record_details) = details

953

method = record_details[0]

954

if compression_parent is not None:

955

pending_components.add(compression_parent)

956

component_data[version_id] = (record_details, index_memo,

957

compression_parent)

958

missing = current_components.difference(build_details)

959

if missing:

960

raise errors.RevisionNotPresent(missing.pop(), self.filename)

641

for version_id in version_ids:

642

cursor = version_id

643

644

while cursor is not None and cursor not in component_data:

645

method = self._index.get_method(cursor)

646

if method == 'fulltext':

647

next = None

648

else:

649

next = self.get_parents(cursor)[0]

650

data_pos, data_size = self._index.get_position(cursor)

651

component_data[cursor] = (method, data_pos, data_size, next)

652

cursor = next

961

653

return component_data

962

654

963

655

def _get_content(self, version_id, parent_texts={}):

964

656

"""Returns a content object that makes up the specified

965

657

version."""

658

if not self.has_version(version_id):

659

raise RevisionNotPresent(version_id, self.filename)

660

966

661

cached_version = parent_texts.get(version_id, None)

967

662

if cached_version is not None:

968

if not self.has_version(version_id):

969

raise RevisionNotPresent(version_id, self.filename)

970

663

return cached_version

971

664

972

665

text_map, contents_map = self._get_content_maps([version_id])

976

669

"""Check that all specified versions are present."""

977

670

self._index.check_versions_present(version_ids)

978

671

979

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

980

nostore_sha, random_id, check_content):

672

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

981

673

"""See VersionedFile.add_lines_with_ghosts()."""

982

self._check_add(version_id, lines, random_id, check_content)

983

return self._add(version_id, lines, parents, self.delta,

984

parent_texts, None, nostore_sha, random_id)

674

self._check_add(version_id, lines)

675

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

985

676

986

def _add_lines(self, version_id, parents, lines, parent_texts,

987

left_matching_blocks, nostore_sha, random_id, check_content):

677

def _add_lines(self, version_id, parents, lines, parent_texts):

988

678

"""See VersionedFile.add_lines."""

989

self._check_add(version_id, lines, random_id, check_content)

679

self._check_add(version_id, lines)

990

680

self._check_versions_present(parents)

991

return self._add(version_id, lines[:], parents, self.delta,

992

parent_texts, left_matching_blocks, nostore_sha, random_id)

681

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

993

682

994

def _check_add(self, version_id, lines, random_id, check_content):

683

def _check_add(self, version_id, lines):

995

684

"""check that version_id and lines are safe to add."""

685

assert self.writable, "knit is not opened for write"

686

### FIXME escape. RBC 20060228

996

687

if contains_whitespace(version_id):

997

688

raise InvalidRevisionId(version_id, self.filename)

998

689

self.check_not_reserved_id(version_id)

999

# Technically this could be avoided if we are happy to allow duplicate

1000

# id insertion when other things than bzr core insert texts, but it

1001

# seems useful for folk using the knit api directly to have some safety

1002

# blanket that we can disable.

1003

if not random_id and self.has_version(version_id):

690

if self.has_version(version_id):

1004

691

raise RevisionAlreadyPresent(version_id, self.filename)

1005

if check_content:

1006

self._check_lines_not_unicode(lines)

1007

self._check_lines_are_lines(lines)

692

self._check_lines_not_unicode(lines)

693

self._check_lines_are_lines(lines)

1008

694

1009

def _add(self, version_id, lines, parents, delta, parent_texts,

1010

left_matching_blocks, nostore_sha, random_id):

695

def _add(self, version_id, lines, parents, delta, parent_texts):

1011

696

"""Add a set of lines on top of version specified by parents.

1012

697

1013

698

If delta is true, compress the text as a line-delta against

1015

700

1016

701

Any versions not present will be converted into ghosts.

1017

702

"""

1018

# first thing, if the content is something we don't need to store, find

1019

# that out.

1020

line_bytes = ''.join(lines)

1021

digest = sha_string(line_bytes)

1022

if nostore_sha == digest:

1023

raise errors.ExistingContent

703

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

704

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

705

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

706

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

707

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

708

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

709

# +1383 0 8.0370 8.0370 +<len>

710

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

711

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

712

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

713

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

1024

714

1025

715

present_parents = []

716

ghosts = []

1026

717

if parent_texts is None:

1027

718

parent_texts = {}

1028

719

for parent in parents:

1029

if self.has_version(parent):

720

if not self.has_version(parent):

721

ghosts.append(parent)

722

else:

1030

723

present_parents.append(parent)

1031

724

1032

# can only compress against the left most present parent.

1033

if (delta and

1034

(len(present_parents) == 0 or

1035

present_parents[0] != parents[0])):

725

if delta and not len(present_parents):

1036

726

delta = False

1037

727

1038

text_length = len(line_bytes)

728

digest = sha_strings(lines)

1039

729

options = []

1040

730

if lines:

1041

731

if lines[-1][-1] != '\n':

1042

# copy the contents of lines.

1043

lines = lines[:]

1044

732

options.append('no-eol')

1045

733

lines[-1] = lines[-1] + '\n'

1046

line_bytes += '\n'

1047

734

1048

if delta:

735

if len(present_parents) and delta:

1049

736

# To speed the extract of texts the delta chain is limited

1050

737

# to a fixed number of deltas. This should minimize both

1051

738

# I/O and the time spend applying deltas.

1052

739

delta = self._check_should_delta(present_parents)

1053

740

1054

assert isinstance(version_id, str)

1055

content = self.factory.make(lines, version_id)

741

lines = self.factory.make(lines, version_id)

1056

742

if delta or (self.factory.annotated and len(present_parents) > 0):

1057

# Merge annotations from parent texts if needed.

1058

delta_hunks = self._merge_annotations(content, present_parents,

1059

parent_texts, delta, self.factory.annotated,

1060

left_matching_blocks)

743

# Merge annotations from parent texts if so is needed.

744

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

745

delta, self.factory.annotated)

1061

746

1062

747

if delta:

1063

748

options.append('line-delta')

1064

749

store_lines = self.factory.lower_line_delta(delta_hunks)

1065

size, bytes = self._data._record_to_data(version_id, digest,

1066

store_lines)

1067

750

else:

1068

751

options.append('fulltext')

1069

# isinstance is slower and we have no hierarchy.

1070

if self.factory.__class__ == KnitPlainFactory:

1071

# Use the already joined bytes saving iteration time in

1072

# _record_to_data.

1073

size, bytes = self._data._record_to_data(version_id, digest,

1074

lines, [line_bytes])

1075

else:

1076

# get mixed annotation + content and feed it into the

1077

# serialiser.

1078

store_lines = self.factory.lower_fulltext(content)

1079

size, bytes = self._data._record_to_data(version_id, digest,

1080

store_lines)

752

store_lines = self.factory.lower_fulltext(lines)

1081

753

1082

access_memo = self._data.add_raw_records([size], bytes)[0]

1083

self._index.add_versions(

1084

((version_id, options, access_memo, parents),),

1085

random_id=random_id)

1086

return digest, text_length, content

754

where, size = self._data.add_record(version_id, digest, store_lines)

755

self._index.add_version(version_id, options, where, size, parents)

756

return lines

1087

757

1088

758

def check(self, progress_bar=None):

1089

759

"""See VersionedFile.check()."""

1101

771

def _get_record_map(self, version_ids):

1102

772

"""Produce a dictionary of knit records.

1103

773

1104

:return: {version_id:(record, record_details, digest, next)}

1105

record

1106

data returned from read_records

1107

record_details

1108

opaque information to pass to parse_record

1109

digest

1110

SHA1 digest of the full text after all steps are done

1111

1112

build-parent of the version, i.e. the leftmost ancestor.

1113

Will be None if the record is not a delta.

774

The keys are version_ids, the values are tuples of (method, content,

775

digest, next).

776

method is the way the content should be applied.

777

content is a KnitContent object.

778

digest is the SHA1 digest of this version id after all steps are done

779

next is the build-parent of the version, i.e. the leftmost ancestor.

780

If the method is fulltext, next will be None.

1114

781

"""

1115

782

position_map = self._get_components_positions(version_ids)

1116

# c = component_id, r = record_details, i_m = index_memo, n = next

1117

records = [(c, i_m) for c, (r, i_m, n)

1118

in position_map.iteritems()]

783

# c = component_id, m = method, p = position, s = size, n = next

784

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

1119

785

record_map = {}

1120

for component_id, record, digest in \

786

for component_id, content, digest in \

1121

787

self._data.read_records_iter(records):

1122

(record_details, index_memo, next) = position_map[component_id]

1123

record_map[component_id] = record, record_details, digest, next

1124

788

method, position, size, next = position_map[component_id]

789

record_map[component_id] = method, content, digest, next

790

1125

791

return record_map

1126

792

1127

793

def get_text(self, version_id):

1138

804

text_map, content_map = self._get_content_maps(version_ids)

1139

805

return [text_map[v] for v in version_ids]

1140

806

1141

_get_lf_split_line_list = get_line_list

1142

1143

807

def _get_content_maps(self, version_ids):

1144

808

"""Produce maps of text and KnitContents

1145

809

1147

811

the requested versions and content_map contains the KnitContents.

1148

812

Both dicts take version_ids as their keys.

1149

813

"""

1150

# FUTURE: This function could be improved for the 'extract many' case

1151

# by tracking each component and only doing the copy when the number of

1152

# children than need to apply delta's to it is > 1 or it is part of the

1153

# final output.

1154

version_ids = list(version_ids)

1155

multiple_versions = len(version_ids) != 1

814

for version_id in version_ids:

815

if not self.has_version(version_id):

816

raise RevisionNotPresent(version_id, self.filename)

1156

817

record_map = self._get_record_map(version_ids)

1157

818

1158

819

text_map = {}

1162

823

components = []

1163

824

cursor = version_id

1164

825

while cursor is not None:

1165

record, record_details, digest, next = record_map[cursor]

1166

components.append((cursor, record, record_details, digest))

826

method, data, digest, next = record_map[cursor]

827

components.append((cursor, method, data, digest))

1167

828

if cursor in content_map:

1168

829

break

1169

830

cursor = next

1170

831

1171

832

content = None

1172

for (component_id, record, record_details,

1173

digest) in reversed(components):

833

for component_id, method, data, digest in reversed(components):

1174

834

if component_id in content_map:

1175

835

content = content_map[component_id]

1176

836

else:

1177

content, delta = self.factory.parse_record(version_id,

1178

record, record_details, content,

1179

copy_base_content=multiple_versions)

1180

if multiple_versions:

1181

content_map[component_id] = content

837

version_idx = self._index.lookup(component_id)

838

if method == 'fulltext':

839

assert content is None

840

content = self.factory.parse_fulltext(data, version_idx)

841

elif method == 'line-delta':

842

delta = self.factory.parse_line_delta(data, version_idx)

843

content = content.copy()

844

content._lines = self._apply_delta(content._lines,

845

delta)

846

content_map[component_id] = content

1182

847

1183

content.cleanup_eol(copy_on_mutate=multiple_versions)

848

if 'no-eol' in self._index.get_options(version_id):

849

content = content.copy()

850

line = content._lines[-1][1].rstrip('\n')

851

content._lines[-1] = (content._lines[-1][0], line)

1184

852

final_content[version_id] = content

1185

853

1186

854

# digest here is the digest from the last applied component.

1187

855

text = content.text()

1188

actual_sha = sha_strings(text)

1189

if actual_sha != digest:

1190

raise KnitCorrupt(self.filename,

1191

'\n sha-1 %s'

1192

'\n of reconstructed text does not match'

1193

'\n expected %s'

1194

'\n for version %s' %

1195

(actual_sha, digest, version_id))

1196

text_map[version_id] = text

1197

return text_map, final_content

856

if sha_strings(text) != digest:

857

raise KnitCorrupt(self.filename,

858

'sha-1 does not match %s' % version_id)

859

860

text_map[version_id] = text

861

return text_map, final_content

1198

862

1199

863

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1200

864

pb=None):

1215

879

# get a in-component-order queue:

1216

880

for version_id in self.versions():

1217

881

if version_id in requested_versions:

1218

index_memo = self._index.get_position(version_id)

1219

version_id_records.append((version_id, index_memo))

882

data_pos, length = self._index.get_position(version_id)

883

version_id_records.append((version_id, data_pos, length))

1220

884

1221

885

total = len(version_id_records)

1222

886

for version_idx, (version_id, data, sha_value) in \

1223

887

enumerate(self._data.read_records_iter(version_id_records)):

1224

888

pb.update('Walking content.', version_idx, total)

1225

889

method = self._index.get_method(version_id)

890

version_idx = self._index.lookup(version_id)

1226

891

1227

892

assert method in ('fulltext', 'line-delta')

1228

893

if method == 'fulltext':

1229

894

line_iterator = self.factory.get_fulltext_content(data)

1230

895

else:

1231

896

line_iterator = self.factory.get_linedelta_content(data)

1232

# XXX: It might be more efficient to yield (version_id,

1233

# line_iterator) in the future. However for now, this is a simpler

1234

# change to integrate into the rest of the codebase. RBC 20071110

1235

897

for line in line_iterator:

1236

yield line, version_id

898

yield line

1237

899

1238

900

pb.update('Walking content.', total, total)

1239

901

1240

def iter_parents(self, version_ids):

1241

"""Iterate through the parents for many version ids.

1242

1243

:param version_ids: An iterable yielding version_ids.

1244

:return: An iterator that yields (version_id, parents). Requested

1245

version_ids not present in the versioned file are simply skipped.

1246

The order is undefined, allowing for different optimisations in

1247

the underlying implementation.

1248

"""

1249

return self._index.iter_parents(version_ids)

1250

1251

902

def num_versions(self):

1252

903

"""See VersionedFile.num_versions()."""

1253

904

return self._index.num_versions()

1256

907

1257

908

def annotate_iter(self, version_id):

1258

909

"""See VersionedFile.annotate_iter."""

1259

return self.factory.annotate_iter(self, version_id)

910

content = self._get_content(version_id)

911

for origin, text in content.annotate_iter():

912

yield origin, text

1260

913

1261

914

def get_parents(self, version_id):

1262

915

"""See VersionedFile.get_parents."""

1275

928

except KeyError:

1276

929

raise RevisionNotPresent(version_id, self.filename)

1277

930

1278

def get_ancestry(self, versions, topo_sorted=True):

931

def get_ancestry(self, versions):

1279

932

"""See VersionedFile.get_ancestry."""

1280

933

if isinstance(versions, basestring):

1281

934

versions = [versions]

1282

935

if not versions:

1283

936

return []

1284

return self._index.get_ancestry(versions, topo_sorted)

937

return self._index.get_ancestry(versions)

1285

938

1286

939

def get_ancestry_with_ghosts(self, versions):

1287

940

"""See VersionedFile.get_ancestry_with_ghosts."""

1291

944

return []

1292

945

return self._index.get_ancestry_with_ghosts(versions)

1293

946

947

#@deprecated_method(zero_eight)

948

def walk(self, version_ids):

949

"""See VersionedFile.walk."""

950

# We take the short path here, and extract all relevant texts

951

# and put them in a weave and let that do all the work. Far

952

# from optimal, but is much simpler.

953

# FIXME RB 20060228 this really is inefficient!

954

from bzrlib.weave import Weave

955

956

w = Weave(self.filename)

957

ancestry = self.get_ancestry(version_ids)

958

sorted_graph = topo_sort(self._index.get_graph())

959

version_list = [vid for vid in sorted_graph if vid in ancestry]

960

961

for version_id in version_list:

962

lines = self.get_lines(version_id)

963

w.add_lines(version_id, self.get_parents(version_id), lines)

964

965

for lineno, insert_id, dset, line in w.walk(version_ids):

966

yield lineno, insert_id, dset, line

967

1294

968

def plan_merge(self, ver_a, ver_b):

1295

969

"""See VersionedFile.plan_merge."""

1296

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1297

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

970

ancestors_b = set(self.get_ancestry(ver_b))

971

def status_a(revision, text):

972

if revision in ancestors_b:

973

return 'killed-b', text

974

else:

975

return 'new-a', text

976

977

ancestors_a = set(self.get_ancestry(ver_a))

978

def status_b(revision, text):

979

if revision in ancestors_a:

980

return 'killed-a', text

981

else:

982

return 'new-b', text

983

1298

984

annotated_a = self.annotate(ver_a)

1299

985

annotated_b = self.annotate(ver_b)

1300

return merge._plan_annotate_merge(annotated_a, annotated_b,

1301

ancestors_a, ancestors_b)

986

plain_a = [t for (a, t) in annotated_a]

987

plain_b = [t for (a, t) in annotated_b]

988

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

989

a_cur = 0

990

b_cur = 0

991

for ai, bi, l in blocks:

992

# process all mismatched sections

993

# (last mismatched section is handled because blocks always

994

# includes a 0-length last block)

995

for revision, text in annotated_a[a_cur:ai]:

996

yield status_a(revision, text)

997

for revision, text in annotated_b[b_cur:bi]:

998

yield status_b(revision, text)

999

1000

# and now the matched section

1001

a_cur = ai + l

1002

b_cur = bi + l

1003

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1004

assert text_a == text_b

1005

yield "unchanged", text_a

1302

1006

1303

1007

1304

1008

class _KnitComponentFile(object):

1328

1032

raise KnitHeaderError(badline=line,

1329

1033

filename=self._transport.abspath(self._filename))

1330

1034

1035

def commit(self):

1036

"""Commit is a nop."""

1037

1331

1038

def __repr__(self):

1332

1039

return '%s(%s)' % (self.__class__.__name__, self._filename)

1333

1040

1423

1130

# so - wc -l of a knit index is != the number of unique names

1424

1131

# in the knit.

1425

1132

self._history = []

1133

decode_utf8 = cache_utf8.decode

1134

pb = ui.ui_factory.nested_progress_bar()

1426

1135

try:

1427

fp = self._transport.get(self._filename)

1136

pb.update('read knit index', 0, 1)

1428

1137

try:

1429

# _load_data may raise NoSuchFile if the target knit is

1430

# completely empty.

1431

_load_data(self, fp)

1432

finally:

1433

fp.close()

1434

except NoSuchFile:

1435

if mode != 'w' or not create:

1436

raise

1437

elif delay_create:

1438

self._need_to_create = True

1138

fp = self._transport.get(self._filename)

1139

try:

1140

# _load_data may raise NoSuchFile if the target knit is

1141

# completely empty.

1142

self._load_data(fp)

1143

finally:

1144

fp.close()

1145

except NoSuchFile:

1146

if mode != 'w' or not create:

1147

raise

1148

elif delay_create:

1149

self._need_to_create = True

1150

else:

1151

self._transport.put_bytes_non_atomic(

1152

self._filename, self.HEADER, mode=self._file_mode)

1153

finally:

1154

pb.update('read knit index', 1, 1)

1155

pb.finished()

1156

1157

def _load_data(self, fp):

1158

cache = self._cache

1159

history = self._history

1160

decode_utf8 = cache_utf8.decode

1161

1162

self.check_header(fp)

1163

# readlines reads the whole file at once:

1164

# bad for transports like http, good for local disk

1165

# we save 60 ms doing this one change (

1166

# from calling readline each time to calling

1167

# readlines once.

1168

# probably what we want for nice behaviour on

1169

# http is a incremental readlines that yields, or

1170

# a check for local vs non local indexes,

1171

history_top = len(history) - 1

1172

for line in fp.readlines():

1173

rec = line.split()

1174

if len(rec) < 5 or rec[-1] != ':':

1175

# corrupt line.

1176

# FIXME: in the future we should determine if its a

1177

# short write - and ignore it

1178

# or a different failure, and raise. RBC 20060407

1179

continue

1180

1181

parents = []

1182

for value in rec[4:-1]:

1183

if value[0] == '.':

1184

# uncompressed reference

1185

parents.append(decode_utf8(value[1:]))

1186

else:

1187

parents.append(history[int(value)])

1188

1189

version_id, options, pos, size = rec[:4]

1190

version_id = decode_utf8(version_id)

1191

1192

# See self._cache_version

1193

# only want the _history index to reference the 1st

1194

# index entry for version_id

1195

if version_id not in cache:

1196

history_top += 1

1197

index = history_top

1198

history.append(version_id)

1439

1199

else:

1440

self._transport.put_bytes_non_atomic(

1441

self._filename, self.HEADER, mode=self._file_mode)

1200

index = cache[version_id][5]

1201

cache[version_id] = (version_id,

1202

options.split(','),

1203

int(pos),

1204

int(size),

1205

parents,

1206

index)

1207

# end self._cache_version

1442

1208

1443

1209

def get_graph(self):

1444

"""Return a list of the node:parents lists from this knit index."""

1445

1210

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1446

1211

1447

def get_ancestry(self, versions, topo_sorted=True):

1212

def get_ancestry(self, versions):

1448

1213

"""See VersionedFile.get_ancestry."""

1449

1214

# get a graph of all the mentioned versions:

1450

1215

graph = {}

1460

1225

# if not completed and not a ghost

1461

1226

pending.update([p for p in parents if p not in graph])

1462

1227

graph[version] = parents

1463

if not topo_sorted:

1464

return graph.keys()

1465

1228

return topo_sort(graph.items())

1466

1229

1467

1230

def get_ancestry_with_ghosts(self, versions):

1484

1247

graph[version] = parents

1485

1248

return topo_sort(graph.items())

1486

1249

1487

def get_build_details(self, version_ids):

1488

"""Get the method, index_memo and compression parent for version_ids.

1489

1490

Ghosts are omitted from the result.

1491

1492

:param version_ids: An iterable of version_ids.

1493

:return: A dict of version_id:(index_memo, compression_parent,

1494

parents, record_details).

1495

index_memo

1496

opaque structure to pass to read_records to extract the raw

1497

data

1498

compression_parent

1499

Content that this record is built upon, may be None

1500

parents

1501

Logical parents of this node

1502

record_details

1503

extra information about the content which needs to be passed to

1504

Factory.parse_record

1505

"""

1506

result = {}

1507

for version_id in version_ids:

1508

if version_id not in self._cache:

1509

# ghosts are omitted

1510

continue

1511

method = self.get_method(version_id)

1512

parents = self.get_parents_with_ghosts(version_id)

1513

if method == 'fulltext':

1514

compression_parent = None

1515

else:

1516

compression_parent = parents[0]

1517

noeol = 'no-eol' in self.get_options(version_id)

1518

index_memo = self.get_position(version_id)

1519

result[version_id] = (index_memo, compression_parent,

1520

parents, (method, noeol))

1521

return result

1522

1523

def iter_parents(self, version_ids):

1524

"""Iterate through the parents for many version ids.

1525

1526

:param version_ids: An iterable yielding version_ids.

1527

:return: An iterator that yields (version_id, parents). Requested

1528

version_ids not present in the versioned file are simply skipped.

1529

The order is undefined, allowing for different optimisations in

1530

the underlying implementation.

1531

"""

1532

for version_id in version_ids:

1533

try:

1534

yield version_id, tuple(self.get_parents(version_id))

1535

except KeyError:

1536

pass

1537

1538

1250

def num_versions(self):

1539

1251

return len(self._history)

1540

1252

1541

1253

__len__ = num_versions

1542

1254

1543

1255

def get_versions(self):

1544

"""Get all the versions in the file. not topologically sorted."""

1545

1256

return self._history

1546

1257

1258

def idx_to_name(self, idx):

1259

return self._history[idx]

1260

1261

def lookup(self, version_id):

1262

assert version_id in self._cache

1263

return self._cache[version_id][5]

1264

1547

1265

def _version_list_to_index(self, versions):

1266

encode_utf8 = cache_utf8.encode

1548

1267

result_list = []

1549

1268

cache = self._cache

1550

1269

for version in versions:

1553

1272

result_list.append(str(cache[version][5]))

1554

1273

# -- end lookup () --

1555

1274

else:

1556

result_list.append('.' + version)

1275

result_list.append('.' + encode_utf8(version))

1557

1276

return ' '.join(result_list)

1558

1277

1559

def add_version(self, version_id, options, index_memo, parents):

1278

def add_version(self, version_id, options, pos, size, parents):

1560

1279

"""Add a version record to the index."""

1561

self.add_versions(((version_id, options, index_memo, parents),))

1280

self.add_versions(((version_id, options, pos, size, parents),))

1562

1281

1563

def add_versions(self, versions, random_id=False):

1282

def add_versions(self, versions):

1564

1283

"""Add multiple versions to the index.

1565

1284

1566

1285

:param versions: a list of tuples:

1567

1286

(version_id, options, pos, size, parents).

1568

:param random_id: If True the ids being added were randomly generated

1569

and no check for existence will be performed.

1570

1287

"""

1571

1288

lines = []

1289

encode_utf8 = cache_utf8.encode

1572

1290

orig_history = self._history[:]

1573

1291

orig_cache = self._cache.copy()

1574

1292

1575

1293

try:

1576

for version_id, options, (index, pos, size), parents in versions:

1577

line = "\n%s %s %s %s %s :" % (version_id,

1294

for version_id, options, pos, size, parents in versions:

1295

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1578

1296

','.join(options),

1579

1297

pos,

1580

1298

size,

1606

1324

return version_id in self._cache

1607

1325

1608

1326

def get_position(self, version_id):

1609

"""Return details needed to access the version.

1610

1611

.kndx indices do not support split-out data, so return None for the

1612

index field.

1613

1614

:return: a tuple (None, data position, size) to hand to the access

1615

logic to get the record.

1616

"""

1327

"""Return data position and size of specified version."""

1617

1328

entry = self._cache[version_id]

1618

return None, entry[2], entry[3]

1329

return entry[2], entry[3]

1619

1330

1620

1331

def get_method(self, version_id):

1621

1332

"""Return compression method of specified version."""

1622

try:

1623

options = self._cache[version_id][1]

1624

except KeyError:

1625

raise RevisionNotPresent(version_id, self._filename)

1333

options = self._cache[version_id][1]

1626

1334

if 'fulltext' in options:

1627

1335

return 'fulltext'

1628

1336

else:

1631

1339

return 'line-delta'

1632

1340

1633

1341

def get_options(self, version_id):

1634

"""Return a list representing options.

1635

1636

e.g. ['foo', 'bar']

1637

"""

1638

1342

return self._cache[version_id][1]

1639

1343

1640

1344

def get_parents(self, version_id):

1654

1358

raise RevisionNotPresent(version_id, self._filename)

1655

1359

1656

1360

1657

class KnitGraphIndex(object):

1658

"""A knit index that builds on GraphIndex."""

1659

1660

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1661

"""Construct a KnitGraphIndex on a graph_index.

1662

1663

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1664

:param deltas: Allow delta-compressed records.

1665

:param add_callback: If not None, allow additions to the index and call

1666

this callback with a list of added GraphIndex nodes:

1667

[(node, value, node_refs), ...]

1668

:param parents: If True, record knits parents, if not do not record

1669

parents.

1670

"""

1671

self._graph_index = graph_index

1672

self._deltas = deltas

1673

self._add_callback = add_callback

1674

self._parents = parents

1675

if deltas and not parents:

1676

raise KnitCorrupt(self, "Cannot do delta compression without "

1677

"parent tracking.")

1678

1679

def _get_entries(self, keys, check_present=False):

1680

"""Get the entries for keys.

1681

1682

:param keys: An iterable of index keys, - 1-tuples.

1683

"""

1684

keys = set(keys)

1685

found_keys = set()

1686

if self._parents:

1687

for node in self._graph_index.iter_entries(keys):

1688

yield node

1689

found_keys.add(node[1])

1690

else:

1691

# adapt parentless index to the rest of the code.

1692

for node in self._graph_index.iter_entries(keys):

1693

yield node[0], node[1], node[2], ()

1694

found_keys.add(node[1])

1695

if check_present:

1696

missing_keys = keys.difference(found_keys)

1697

if missing_keys:

1698

raise RevisionNotPresent(missing_keys.pop(), self)

1699

1700

def _present_keys(self, version_ids):

1701

return set([

1702

node[1] for node in self._get_entries(version_ids)])

1703

1704

def _parentless_ancestry(self, versions):

1705

"""Honour the get_ancestry API for parentless knit indices."""

1706

wanted_keys = self._version_ids_to_keys(versions)

1707

present_keys = self._present_keys(wanted_keys)

1708

missing = set(wanted_keys).difference(present_keys)

1709

if missing:

1710

raise RevisionNotPresent(missing.pop(), self)

1711

return list(self._keys_to_version_ids(present_keys))

1712

1713

def get_ancestry(self, versions, topo_sorted=True):

1714

"""See VersionedFile.get_ancestry."""

1715

if not self._parents:

1716

return self._parentless_ancestry(versions)

1717

# XXX: This will do len(history) index calls - perhaps

1718

# it should be altered to be a index core feature?

1719

# get a graph of all the mentioned versions:

1720

graph = {}

1721

ghosts = set()

1722

versions = self._version_ids_to_keys(versions)

1723

pending = set(versions)

1724

while pending:

1725

# get all pending nodes

1726

this_iteration = pending

1727

new_nodes = self._get_entries(this_iteration)

1728

found = set()

1729

pending = set()

1730

for (index, key, value, node_refs) in new_nodes:

1731

# dont ask for ghosties - otherwise

1732

# we we can end up looping with pending

1733

# being entirely ghosted.

1734

graph[key] = [parent for parent in node_refs[0]

1735

if parent not in ghosts]

1736

# queue parents

1737

for parent in graph[key]:

1738

# dont examine known nodes again

1739

if parent in graph:

1740

continue

1741

pending.add(parent)

1742

found.add(key)

1743

ghosts.update(this_iteration.difference(found))

1744

if versions.difference(graph):

1745

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1746

if topo_sorted:

1747

result_keys = topo_sort(graph.items())

1748

else:

1749

result_keys = graph.iterkeys()

1750

return [key[0] for key in result_keys]

1751

1752

def get_ancestry_with_ghosts(self, versions):

1753

"""See VersionedFile.get_ancestry."""

1754

if not self._parents:

1755

return self._parentless_ancestry(versions)

1756

# XXX: This will do len(history) index calls - perhaps

1757

# it should be altered to be a index core feature?

1758

# get a graph of all the mentioned versions:

1759

graph = {}

1760

versions = self._version_ids_to_keys(versions)

1761

pending = set(versions)

1762

while pending:

1763

# get all pending nodes

1764

this_iteration = pending

1765

new_nodes = self._get_entries(this_iteration)

1766

pending = set()

1767

for (index, key, value, node_refs) in new_nodes:

1768

graph[key] = node_refs[0]

1769

# queue parents

1770

for parent in graph[key]:

1771

# dont examine known nodes again

1772

if parent in graph:

1773

continue

1774

pending.add(parent)

1775

missing_versions = this_iteration.difference(graph)

1776

missing_needed = versions.intersection(missing_versions)

1777

if missing_needed:

1778

raise RevisionNotPresent(missing_needed.pop(), self)

1779

for missing_version in missing_versions:

1780

# add a key, no parents

1781

graph[missing_version] = []

1782

pending.discard(missing_version) # don't look for it

1783

result_keys = topo_sort(graph.items())

1784

return [key[0] for key in result_keys]

1785

1786

def get_build_details(self, version_ids):

1787

"""Get the method, index_memo and compression parent for version_ids.

1788

1789

Ghosts are omitted from the result.

1790

1791

:param version_ids: An iterable of version_ids.

1792

:return: A dict of version_id:(index_memo, compression_parent,

1793

parents, record_details).

1794

index_memo

1795

opaque structure to pass to read_records to extract the raw

1796

data

1797

compression_parent

1798

Content that this record is built upon, may be None

1799

parents

1800

Logical parents of this node

1801

record_details

1802

extra information about the content which needs to be passed to

1803

Factory.parse_record

1804

"""

1805

result = {}

1806

entries = self._get_entries(self._version_ids_to_keys(version_ids), True)

1807

for entry in entries:

1808

version_id = self._keys_to_version_ids((entry[1],))[0]

1809

if not self._parents:

1810

parents = ()

1811

else:

1812

parents = self._keys_to_version_ids(entry[3][0])

1813

if not self._deltas:

1814

compression_parent = None

1815

else:

1816

compression_parent_key = self._compression_parent(entry)

1817

if compression_parent_key:

1818

compression_parent = self._keys_to_version_ids(

1819

(compression_parent_key,))[0]

1820

else:

1821

compression_parent = None

1822

noeol = (entry[2][0] == 'N')

1823

if compression_parent:

1824

method = 'line-delta'

1825

else:

1826

method = 'fulltext'

1827

result[version_id] = (self._node_to_position(entry),

1828

compression_parent, parents,

1829

(method, noeol))

1830

return result

1831

1832

def _compression_parent(self, an_entry):

1833

# return the key that an_entry is compressed against, or None

1834

# Grab the second parent list (as deltas implies parents currently)

1835

compression_parents = an_entry[3][1]

1836

if not compression_parents:

1837

return None

1838

assert len(compression_parents) == 1

1839

return compression_parents[0]

1840

1841

def _get_method(self, node):

1842

if not self._deltas:

1843

return 'fulltext'

1844

if self._compression_parent(node):

1845

return 'line-delta'

1846

else:

1847

return 'fulltext'

1848

1849

def get_graph(self):

1850

"""Return a list of the node:parents lists from this knit index."""

1851

if not self._parents:

1852

return [(key, ()) for key in self.get_versions()]

1853

result = []

1854

for index, key, value, refs in self._graph_index.iter_all_entries():

1855

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1856

return result

1857

1858

def iter_parents(self, version_ids):

1859

"""Iterate through the parents for many version ids.

1860

1861

:param version_ids: An iterable yielding version_ids.

1862

:return: An iterator that yields (version_id, parents). Requested

1863

version_ids not present in the versioned file are simply skipped.

1864

The order is undefined, allowing for different optimisations in

1865

the underlying implementation.

1866

"""

1867

if self._parents:

1868

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1869

all_parents = set()

1870

present_parents = set()

1871

for node in all_nodes:

1872

all_parents.update(node[3][0])

1873

# any node we are querying must be present

1874

present_parents.add(node[1])

1875

unknown_parents = all_parents.difference(present_parents)

1876

present_parents.update(self._present_keys(unknown_parents))

1877

for node in all_nodes:

1878

parents = []

1879

for parent in node[3][0]:

1880

if parent in present_parents:

1881

parents.append(parent[0])

1882

yield node[1][0], tuple(parents)

1883

else:

1884

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1885

yield node[1][0], ()

1886

1887

def num_versions(self):

1888

return len(list(self._graph_index.iter_all_entries()))

1889

1890

__len__ = num_versions

1891

1892

def get_versions(self):

1893

"""Get all the versions in the file. not topologically sorted."""

1894

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1895

1896

def has_version(self, version_id):

1897

"""True if the version is in the index."""

1898

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1899

1900

def _keys_to_version_ids(self, keys):

1901

return tuple(key[0] for key in keys)

1902

1903

def get_position(self, version_id):

1904

"""Return details needed to access the version.

1905

1906

:return: a tuple (index, data position, size) to hand to the access

1907

logic to get the record.

1908

"""

1909

node = self._get_node(version_id)

1910

return self._node_to_position(node)

1911

1912

def _node_to_position(self, node):

1913

"""Convert an index value to position details."""

1914

bits = node[2][1:].split(' ')

1915

return node[0], int(bits[0]), int(bits[1])

1916

1917

def get_method(self, version_id):

1918

"""Return compression method of specified version."""

1919

return self._get_method(self._get_node(version_id))

1920

1921

def _get_node(self, version_id):

1922

try:

1923

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1924

except IndexError:

1925

raise RevisionNotPresent(version_id, self)

1926

1927

def get_options(self, version_id):

1928

"""Return a list representing options.

1929

1930

e.g. ['foo', 'bar']

1931

"""

1932

node = self._get_node(version_id)

1933

options = [self._get_method(node)]

1934

if node[2][0] == 'N':

1935

options.append('no-eol')

1936

return options

1937

1938

def get_parents(self, version_id):

1939

"""Return parents of specified version ignoring ghosts."""

1940

parents = list(self.iter_parents([version_id]))

1941

if not parents:

1942

# missing key

1943

raise errors.RevisionNotPresent(version_id, self)

1944

return parents[0][1]

1945

1946

def get_parents_with_ghosts(self, version_id):

1947

"""Return parents of specified version with ghosts."""

1948

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1949

check_present=True))

1950

if not self._parents:

1951

return ()

1952

return self._keys_to_version_ids(nodes[0][3][0])

1953

1954

def check_versions_present(self, version_ids):

1955

"""Check that all specified versions are present."""

1956

keys = self._version_ids_to_keys(version_ids)

1957

present = self._present_keys(keys)

1958

missing = keys.difference(present)

1959

if missing:

1960

raise RevisionNotPresent(missing.pop(), self)

1961

1962

def add_version(self, version_id, options, access_memo, parents):

1963

"""Add a version record to the index."""

1964

return self.add_versions(((version_id, options, access_memo, parents),))

1965

1966

def add_versions(self, versions, random_id=False):

1967

"""Add multiple versions to the index.

1968

1969

This function does not insert data into the Immutable GraphIndex

1970

backing the KnitGraphIndex, instead it prepares data for insertion by

1971

the caller and checks that it is safe to insert then calls

1972

self._add_callback with the prepared GraphIndex nodes.

1973

1974

:param versions: a list of tuples:

1975

(version_id, options, pos, size, parents).

1976

:param random_id: If True the ids being added were randomly generated

1977

and no check for existence will be performed.

1978

"""

1979

if not self._add_callback:

1980

raise errors.ReadOnlyError(self)

1981

# we hope there are no repositories with inconsistent parentage

1982

# anymore.

1983

# check for dups

1984

1985

keys = {}

1986

for (version_id, options, access_memo, parents) in versions:

1987

index, pos, size = access_memo

1988

key = (version_id, )

1989

parents = tuple((parent, ) for parent in parents)

1990

if 'no-eol' in options:

1991

value = 'N'

1992

else:

1993

value = ' '

1994

value += "%d %d" % (pos, size)

1995

if not self._deltas:

1996

if 'line-delta' in options:

1997

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1998

if self._parents:

1999

if self._deltas:

2000

if 'line-delta' in options:

2001

node_refs = (parents, (parents[0],))

2002

else:

2003

node_refs = (parents, ())

2004

else:

2005

node_refs = (parents, )

2006

else:

2007

if parents:

2008

raise KnitCorrupt(self, "attempt to add node with parents "

2009

"in parentless index.")

2010

node_refs = ()

2011

keys[key] = (value, node_refs)

2012

if not random_id:

2013

present_nodes = self._get_entries(keys)

2014

for (index, key, value, node_refs) in present_nodes:

2015

if (value, node_refs) != keys[key]:

2016

raise KnitCorrupt(self, "inconsistent details in add_versions"

2017

": %s %s" % ((value, node_refs), keys[key]))

2018

del keys[key]

2019

result = []

2020

if self._parents:

2021

for key, (value, node_refs) in keys.iteritems():

2022

result.append((key, value, node_refs))

2023

else:

2024

for key, (value, node_refs) in keys.iteritems():

2025

result.append((key, value))

2026

self._add_callback(result)

2027

2028

def _version_ids_to_keys(self, version_ids):

2029

return set((version_id, ) for version_id in version_ids)

2030

2031

2032

class _KnitAccess(object):

2033

"""Access to knit records in a .knit file."""

2034

2035

def __init__(self, transport, filename, _file_mode, _dir_mode,

2036

_need_to_create, _create_parent_dir):

2037

"""Create a _KnitAccess for accessing and inserting data.

2038

2039

:param transport: The transport the .knit is located on.

2040

:param filename: The filename of the .knit.

2041

"""

2042

self._transport = transport

2043

self._filename = filename

2044

self._file_mode = _file_mode

2045

self._dir_mode = _dir_mode

2046

self._need_to_create = _need_to_create

2047

self._create_parent_dir = _create_parent_dir

2048

2049

def add_raw_records(self, sizes, raw_data):

2050

"""Add raw knit bytes to a storage area.

2051

2052

The data is spooled to whereever the access method is storing data.

2053

2054

:param sizes: An iterable containing the size of each raw data segment.

2055

:param raw_data: A bytestring containing the data.

2056

:return: A list of memos to retrieve the record later. Each memo is a

2057

tuple - (index, pos, length), where the index field is always None

2058

for the .knit access method.

2059

"""

2060

assert type(raw_data) == str, \

2061

'data must be plain bytes was %s' % type(raw_data)

2062

if not self._need_to_create:

2063

base = self._transport.append_bytes(self._filename, raw_data)

2064

else:

2065

self._transport.put_bytes_non_atomic(self._filename, raw_data,

2066

create_parent_dir=self._create_parent_dir,

2067

mode=self._file_mode,

2068

dir_mode=self._dir_mode)

2069

self._need_to_create = False

2070

base = 0

2071

result = []

2072

for size in sizes:

2073

result.append((None, base, size))

2074

base += size

2075

return result

2076

2077

def create(self):

2078

"""IFF this data access has its own storage area, initialise it.

2079

2080

:return: None.

2081

"""

2082

self._transport.put_bytes_non_atomic(self._filename, '',

2083

mode=self._file_mode)

2084

2085

def open_file(self):

2086

"""IFF this data access can be represented as a single file, open it.

2087

2088

For knits that are not mapped to a single file on disk this will

2089

always return None.

2090

2091

:return: None or a file handle.

2092

"""

2093

try:

2094

return self._transport.get(self._filename)

2095

except NoSuchFile:

2096

pass

2097

return None

2098

2099

def get_raw_records(self, memos_for_retrieval):

2100

"""Get the raw bytes for a records.

2101

2102

:param memos_for_retrieval: An iterable containing the (index, pos,

2103

length) memo for retrieving the bytes. The .knit method ignores

2104

the index as there is always only a single file.

2105

:return: An iterator over the bytes of the records.

2106

"""

2107

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

2108

for pos, data in self._transport.readv(self._filename, read_vector):

2109

yield data

2110

2111

2112

class _PackAccess(object):

2113

"""Access to knit records via a collection of packs."""

2114

2115

def __init__(self, index_to_packs, writer=None):

2116

"""Create a _PackAccess object.

2117

2118

:param index_to_packs: A dict mapping index objects to the transport

2119

and file names for obtaining data.

2120

:param writer: A tuple (pack.ContainerWriter, write_index) which

2121

contains the pack to write, and the index that reads from it will

2122

be associated with.

2123

"""

2124

if writer:

2125

self.container_writer = writer[0]

2126

self.write_index = writer[1]

2127

else:

2128

self.container_writer = None

2129

self.write_index = None

2130

self.indices = index_to_packs

2131

2132

def add_raw_records(self, sizes, raw_data):

2133

"""Add raw knit bytes to a storage area.

2134

2135

The data is spooled to the container writer in one bytes-record per

2136

raw data item.

2137

2138

:param sizes: An iterable containing the size of each raw data segment.

2139

:param raw_data: A bytestring containing the data.

2140

:return: A list of memos to retrieve the record later. Each memo is a

2141

tuple - (index, pos, length), where the index field is the

2142

write_index object supplied to the PackAccess object.

2143

"""

2144

assert type(raw_data) == str, \

2145

'data must be plain bytes was %s' % type(raw_data)

2146

result = []

2147

offset = 0

2148

for size in sizes:

2149

p_offset, p_length = self.container_writer.add_bytes_record(

2150

raw_data[offset:offset+size], [])

2151

offset += size

2152

result.append((self.write_index, p_offset, p_length))

2153

return result

2154

2155

def create(self):

2156

"""Pack based knits do not get individually created."""

2157

2158

def get_raw_records(self, memos_for_retrieval):

2159

"""Get the raw bytes for a records.

2160

2161

:param memos_for_retrieval: An iterable containing the (index, pos,

2162

length) memo for retrieving the bytes. The Pack access method

2163

looks up the pack to use for a given record in its index_to_pack

2164

map.

2165

:return: An iterator over the bytes of the records.

2166

"""

2167

# first pass, group into same-index requests

2168

request_lists = []

2169

current_index = None

2170

for (index, offset, length) in memos_for_retrieval:

2171

if current_index == index:

2172

current_list.append((offset, length))

2173

else:

2174

if current_index is not None:

2175

request_lists.append((current_index, current_list))

2176

current_index = index

2177

current_list = [(offset, length)]

2178

# handle the last entry

2179

if current_index is not None:

2180

request_lists.append((current_index, current_list))

2181

for index, offsets in request_lists:

2182

transport, path = self.indices[index]

2183

reader = pack.make_readv_reader(transport, path, offsets)

2184

for names, read_func in reader.iter_records():

2185

yield read_func(None)

2186

2187

def open_file(self):

2188

"""Pack based knits have no single file."""

2189

return None

2190

2191

def set_writer(self, writer, index, (transport, packname)):

2192

"""Set a writer to use for adding data."""

2193

if index is not None:

2194

self.indices[index] = (transport, packname)

2195

self.container_writer = writer

2196

self.write_index = index

2197

2198

2199

class _StreamAccess(object):

2200

"""A Knit Access object that provides data from a datastream.

2201

2202

It also provides a fallback to present as unannotated data, annotated data

2203

from a *backing* access object.

2204

2205

This is triggered by a index_memo which is pointing to a different index

2206

than this was constructed with, and is used to allow extracting full

2207

unannotated texts for insertion into annotated knits.

2208

"""

2209

2210

def __init__(self, reader_callable, stream_index, backing_knit,

2211

orig_factory):

2212

"""Create a _StreamAccess object.

2213

2214

:param reader_callable: The reader_callable from the datastream.

2215

This is called to buffer all the data immediately, for

2216

random access.

2217

:param stream_index: The index the data stream this provides access to

2218

which will be present in native index_memo's.

2219

:param backing_knit: The knit object that will provide access to

2220

annotated texts which are not available in the stream, so as to

2221

create unannotated texts.

2222

:param orig_factory: The original content factory used to generate the

2223

stream. This is used for checking whether the thunk code for

2224

supporting _copy_texts will generate the correct form of data.

2225

"""

2226

self.data = reader_callable(None)

2227

self.stream_index = stream_index

2228

self.backing_knit = backing_knit

2229

self.orig_factory = orig_factory

2230

2231

def get_raw_records(self, memos_for_retrieval):

2232

"""Get the raw bytes for a records.

2233

2234

:param memos_for_retrieval: An iterable containing the (thunk_flag,

2235

index, start, end) memo for retrieving the bytes.

2236

:return: An iterator over the bytes of the records.

2237

"""

2238

# use a generator for memory friendliness

2239

for thunk_flag, version_id, start, end in memos_for_retrieval:

2240

if version_id is self.stream_index:

2241

yield self.data[start:end]

2242

continue

2243

# we have been asked to thunk. This thunking only occurs when

2244

# we are obtaining plain texts from an annotated backing knit

2245

# so that _copy_texts will work.

2246

# We could improve performance here by scanning for where we need

2247

# to do this and using get_line_list, then interleaving the output

2248

# as desired. However, for now, this is sufficient.

2249

if self.orig_factory.__class__ != KnitPlainFactory:

2250

raise errors.KnitCorrupt(

2251

self, 'Bad thunk request %r' % version_id)

2252

lines = self.backing_knit.get_lines(version_id)

2253

line_bytes = ''.join(lines)

2254

digest = sha_string(line_bytes)

2255

if lines:

2256

if lines[-1][-1] != '\n':

2257

lines[-1] = lines[-1] + '\n'

2258

line_bytes += '\n'

2259

orig_options = list(self.backing_knit._index.get_options(version_id))

2260

if 'fulltext' not in orig_options:

2261

if 'line-delta' not in orig_options:

2262

raise errors.KnitCorrupt(self,

2263

'Unknown compression method %r' % orig_options)

2264

orig_options.remove('line-delta')

2265

orig_options.append('fulltext')

2266

# We want plain data, because we expect to thunk only to allow text

2267

# extraction.

2268

size, bytes = self.backing_knit._data._record_to_data(version_id,

2269

digest, lines, line_bytes)

2270

yield bytes

2271

2272

2273

class _StreamIndex(object):

2274

"""A Knit Index object that uses the data map from a datastream."""

2275

2276

def __init__(self, data_list, backing_index):

2277

"""Create a _StreamIndex object.

2278

2279

:param data_list: The data_list from the datastream.

2280

:param backing_index: The index which will supply values for nodes

2281

referenced outside of this stream.

2282

"""

2283

self.data_list = data_list

2284

self.backing_index = backing_index

2285

self._by_version = {}

2286

pos = 0

2287

for key, options, length, parents in data_list:

2288

self._by_version[key] = options, (pos, pos + length), parents

2289

pos += length

2290

2291

def get_ancestry(self, versions, topo_sorted):

2292

"""Get an ancestry list for versions."""

2293

if topo_sorted:

2294

# Not needed for basic joins

2295

raise NotImplementedError(self.get_ancestry)

2296

# get a graph of all the mentioned versions:

2297

# Little ugly - basically copied from KnitIndex, but don't want to

2298

# accidentally incorporate too much of that index's code.

2299

ancestry = set()

2300

pending = set(versions)

2301

cache = self._by_version

2302

while pending:

2303

version = pending.pop()

2304

# trim ghosts

2305

try:

2306

parents = [p for p in cache[version][2] if p in cache]

2307

except KeyError:

2308

raise RevisionNotPresent(version, self)

2309

# if not completed and not a ghost

2310

pending.update([p for p in parents if p not in ancestry])

2311

ancestry.add(version)

2312

return list(ancestry)

2313

2314

def get_build_details(self, version_ids):

2315

"""Get the method, index_memo and compression parent for version_ids.

2316

2317

Ghosts are omitted from the result.

2318

2319

:param version_ids: An iterable of version_ids.

2320

:return: A dict of version_id:(index_memo, compression_parent,

2321

parents, record_details).

2322

index_memo

2323

opaque structure to pass to read_records to extract the raw

2324

data

2325

compression_parent

2326

Content that this record is built upon, may be None

2327

parents

2328

Logical parents of this node

2329

record_details

2330

extra information about the content which needs to be passed to

2331

Factory.parse_record

2332

"""

2333

result = {}

2334

for version_id in version_ids:

2335

try:

2336

method = self.get_method(version_id)

2337

except errors.RevisionNotPresent:

2338

# ghosts are omitted

2339

continue

2340

parent_ids = self.get_parents_with_ghosts(version_id)

2341

noeol = ('no-eol' in self.get_options(version_id))

2342

if method == 'fulltext':

2343

compression_parent = None

2344

else:

2345

compression_parent = parent_ids[0]

2346

index_memo = self.get_position(version_id)

2347

result[version_id] = (index_memo, compression_parent,

2348

parent_ids, (method, noeol))

2349

return result

2350

2351

def get_method(self, version_id):

2352

"""Return compression method of specified version."""

2353

try:

2354

options = self._by_version[version_id][0]

2355

except KeyError:

2356

# Strictly speaking this should check in the backing knit, but

2357

# until we have a test to discriminate, this will do.

2358

return self.backing_index.get_method(version_id)

2359

if 'fulltext' in options:

2360

return 'fulltext'

2361

elif 'line-delta' in options:

2362

return 'line-delta'

2363

else:

2364

raise errors.KnitIndexUnknownMethod(self, options)

2365

2366

def get_options(self, version_id):

2367

"""Return a list representing options.

2368

2369

e.g. ['foo', 'bar']

2370

"""

2371

try:

2372

return self._by_version[version_id][0]

2373

except KeyError:

2374

return self.backing_index.get_options(version_id)

2375

2376

def get_parents_with_ghosts(self, version_id):

2377

"""Return parents of specified version with ghosts."""

2378

try:

2379

return self._by_version[version_id][2]

2380

except KeyError:

2381

return self.backing_index.get_parents_with_ghosts(version_id)

2382

2383

def get_position(self, version_id):

2384

"""Return details needed to access the version.

2385

2386

_StreamAccess has the data as a big array, so we return slice

2387

coordinates into that (as index_memo's are opaque outside the

2388

index and matching access class).

2389

2390

:return: a tuple (thunk_flag, index, start, end). If thunk_flag is

2391

False, index will be self, otherwise it will be a version id.

2392

"""

2393

try:

2394

start, end = self._by_version[version_id][1]

2395

return False, self, start, end

2396

except KeyError:

2397

# Signal to the access object to handle this from the backing knit.

2398

return (True, version_id, None, None)

2399

2400

def get_versions(self):

2401

"""Get all the versions in the stream."""

2402

return self._by_version.keys()

2403

2404

def iter_parents(self, version_ids):

2405

"""Iterate through the parents for many version ids.

2406

2407

:param version_ids: An iterable yielding version_ids.

2408

:return: An iterator that yields (version_id, parents). Requested

2409

version_ids not present in the versioned file are simply skipped.

2410

The order is undefined, allowing for different optimisations in

2411

the underlying implementation.

2412

"""

2413

result = []

2414

for version in version_ids:

2415

try:

2416

result.append((version, self._by_version[version][2]))

2417

except KeyError:

2418

pass

2419

return result

2420

2421

2422

class _KnitData(object):

2423

"""Manage extraction of data from a KnitAccess, caching and decompressing.

2424

2425

The KnitData class provides the logic for parsing and using knit records,

2426

making use of an access method for the low level read and write operations.

2427

"""

2428

2429

def __init__(self, access):

2430

"""Create a KnitData object.

2431

2432

:param access: The access method to use. Access methods such as

2433

_KnitAccess manage the insertion of raw records and the subsequent

2434

retrieval of the same.

2435

"""

2436

self._access = access

1361

class _KnitData(_KnitComponentFile):

1362

"""Contents of the knit data file"""

1363

1364

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1365

create_parent_dir=False, delay_create=False,

1366

dir_mode=None):

1367

_KnitComponentFile.__init__(self, transport, filename, mode,

1368

file_mode=file_mode,

1369

create_parent_dir=create_parent_dir,

1370

dir_mode=dir_mode)

2437

1371

self._checked = False

2438

1372

# TODO: jam 20060713 conceptually, this could spill to disk

2439

1373

# if the cached size gets larger than a certain amount

2441

1375

# a simple dictionary

2442

1376

self._cache = {}

2443

1377

self._do_cache = False

1378

if create:

1379

if delay_create:

1380

self._need_to_create = create

1381

else:

1382

self._transport.put_bytes_non_atomic(self._filename, '',

1383

mode=self._file_mode)

2444

1384

2445

1385

def enable_cache(self):

2446

1386

"""Enable caching of reads."""

2452

1392

self._cache = {}

2453

1393

2454

1394

def _open_file(self):

2455

return self._access.open_file()

1395

try:

1396

return self._transport.get(self._filename)

1397

except NoSuchFile:

1398

pass

1399

return None

2456

1400

2457

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

1401

def _record_to_data(self, version_id, digest, lines):

2458

1402

"""Convert version_id, digest, lines into a raw data block.

2459

1403

2460

:param dense_lines: The bytes of lines but in a denser form. For

2461

instance, if lines is a list of 1000 bytestrings each ending in \n,

2462

dense_lines may be a list with one line in it, containing all the

2463

1000's lines and their \n's. Using dense_lines if it is already

2464

known is a win because the string join to create bytes in this

2465

function spends less time resizing the final string.

2466

1404

:return: (len, a StringIO instance with the raw data ready to read.)

2467

1405

"""

2468

# Note: using a string copy here increases memory pressure with e.g.

2469

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2470

# when doing the initial commit of a mozilla tree. RBC 20070921

2471

bytes = ''.join(chain(

2472

["version %s %d %s\n" % (version_id,

1406

sio = StringIO()

1407

data_file = GzipFile(None, mode='wb', fileobj=sio)

1408

1409

version_id_utf8 = cache_utf8.encode(version_id)

1410

data_file.writelines(chain(

1411

["version %s %d %s\n" % (version_id_utf8,

2473

1412

len(lines),

2474

1413

digest)],

2475

dense_lines or lines,

2476

["end %s\n" % version_id]))

2477

assert bytes.__class__ == str

2478

compressed_bytes = bytes_to_gzip(bytes)

2479

return len(compressed_bytes), compressed_bytes

2480

2481

def add_raw_records(self, sizes, raw_data):

1414

lines,

1415

["end %s\n" % version_id_utf8]))

1416

data_file.close()

1417

length= sio.tell()

1418

1419

sio.seek(0)

1420

return length, sio

1421

1422

def add_raw_record(self, raw_data):

2482

1423

"""Append a prepared record to the data file.

2483

1424

2484

:param sizes: An iterable containing the size of each raw data segment.

2485

:param raw_data: A bytestring containing the data.

2486

:return: a list of index data for the way the data was stored.

2487

See the access method add_raw_records documentation for more

2488

details.

1425

:return: the offset in the data file raw_data was written.

2489

1426

"""

2490

return self._access.add_raw_records(sizes, raw_data)

1427

assert isinstance(raw_data, str), 'data must be plain bytes'

1428

if not self._need_to_create:

1429

return self._transport.append_bytes(self._filename, raw_data)

1430

else:

1431

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1432

create_parent_dir=self._create_parent_dir,

1433

mode=self._file_mode,

1434

dir_mode=self._dir_mode)

1435

self._need_to_create = False

1436

return 0

2491

1437

1438

def add_record(self, version_id, digest, lines):

1439

"""Write new text record to disk. Returns the position in the

1440

file where it was written."""

1441

size, sio = self._record_to_data(version_id, digest, lines)

1442

# write to disk

1443

if not self._need_to_create:

1444

start_pos = self._transport.append_file(self._filename, sio)

1445

else:

1446

self._transport.put_file_non_atomic(self._filename, sio,

1447

create_parent_dir=self._create_parent_dir,

1448

mode=self._file_mode,

1449

dir_mode=self._dir_mode)

1450

self._need_to_create = False

1451

start_pos = 0

1452

if self._do_cache:

1453

self._cache[version_id] = sio.getvalue()

1454

return start_pos, size

1455

2492

1456

def _parse_record_header(self, version_id, raw_data):

2493

1457

"""Parse a record header for consistency.

2494

1458

2496

1460

as (stream, header_record)

2497

1461

"""

2498

1462

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2499

try:

2500

rec = self._check_header(version_id, df.readline())

2501

except Exception, e:

2502

raise KnitCorrupt(self._access,

2503

"While reading {%s} got %s(%s)"

2504

% (version_id, e.__class__.__name__, str(e)))

1463

rec = self._check_header(version_id, df.readline())

2505

1464

return df, rec

2506

1465

2507

1466

def _check_header(self, version_id, line):

2508

1467

rec = line.split()

2509

1468

if len(rec) != 4:

2510

raise KnitCorrupt(self._access,

1469

raise KnitCorrupt(self._filename,

2511

1470

'unexpected number of elements in record header')

2512

if rec[1] != version_id:

2513

raise KnitCorrupt(self._access,

1471

if cache_utf8.decode(rec[1]) != version_id:

1472

raise KnitCorrupt(self._filename,

2514

1473

'unexpected version, wanted %r, got %r'

2515

1474

% (version_id, rec[1]))

2516

1475

return rec

2522

1481

# 4168 calls to readlines in 330

2523

1482

df = GzipFile(mode='rb', fileobj=StringIO(data))

2524

1483

2525

try:

2526

record_contents = df.readlines()

2527

except Exception, e:

2528

raise KnitCorrupt(self._access,

2529

"While reading {%s} got %s(%s)"

2530

% (version_id, e.__class__.__name__, str(e)))

1484

record_contents = df.readlines()

2531

1485

header = record_contents.pop(0)

2532

1486

rec = self._check_header(version_id, header)

2533

1487

2534

1488

last_line = record_contents.pop()

2535

if len(record_contents) != int(rec[2]):

2536

raise KnitCorrupt(self._access,

2537

'incorrect number of lines %s != %s'

2538

' for version {%s}'

2539

% (len(record_contents), int(rec[2]),

2540

version_id))

1489

assert len(record_contents) == int(rec[2])

2541

1490

if last_line != 'end %s\n' % rec[1]:

2542

raise KnitCorrupt(self._access,

1491

raise KnitCorrupt(self._filename,

2543

1492

'unexpected version end line %r, wanted %r'

2544

1493

% (last_line, version_id))

2545

1494

df.close()

2557

1506

# grab the disk data needed.

2558

1507

if self._cache:

2559

1508

# Don't check _cache if it is empty

2560

needed_offsets = [index_memo for version_id, index_memo

1509

needed_offsets = [(pos, size) for version_id, pos, size

2561

1510

in records

2562

1511

if version_id not in self._cache]

2563

1512

else:

2564

needed_offsets = [index_memo for version_id, index_memo

1513

needed_offsets = [(pos, size) for version_id, pos, size

2565

1514

in records]

2566

1515

2567

raw_records = self._access.get_raw_records(needed_offsets)

1516

raw_records = self._transport.readv(self._filename, needed_offsets)

2568

1517

2569

for version_id, index_memo in records:

1518

for version_id, pos, size in records:

2570

1519

if version_id in self._cache:

2571

1520

# This data has already been validated

2572

1521

data = self._cache[version_id]

2573

1522

else:

2574

data = raw_records.next()

1523

pos, data = raw_records.next()

2575

1524

if self._do_cache:

2576

1525

self._cache[version_id] = data

2577

1526

2616

1565

2617

1566

# The transport optimizes the fetching as well

2618

1567

# (ie, reads continuous ranges.)

2619

raw_data = self._access.get_raw_records(

2620

[index_memo for version_id, index_memo in needed_records])

1568

readv_response = self._transport.readv(self._filename,

1569

[(pos, size) for version_id, pos, size in needed_records])

2621

1570

2622

for (version_id, index_memo), data in \

2623

izip(iter(needed_records), raw_data):

1571

for (version_id, pos, size), (pos, data) in \

1572

izip(iter(needed_records), readv_response):

2624

1573

content, digest = self._parse_record(version_id, data)

2625

1574

if self._do_cache:

2626

1575

self._cache[version_id] = data

2650

1599

except AttributeError:

2651

1600

return False

2652

1601

2653

def _copy_texts(self, pb, msg, version_ids, ignore_missing=False):

2654

"""Copy texts to the target by extracting and adding them one by one.

2655

2656

see join() for the parameter definitions.

2657

"""

2658

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2659

graph = self.source.get_graph(version_ids)

2660

order = topo_sort(graph.items())

2661

2662

def size_of_content(content):

2663

return sum(len(line) for line in content.text())

2664

# Cache at most 10MB of parent texts

2665

parent_cache = lru_cache.LRUSizeCache(max_size=10*1024*1024,

2666

compute_size=size_of_content)

2667

# TODO: jam 20071116 It would be nice to have a streaming interface to

2668

# get multiple texts from a source. The source could be smarter

2669

# about how it handled intermediate stages.

2670

# get_line_list() or make_mpdiffs() seem like a possibility, but

2671

# at the moment they extract all full texts into memory, which

2672

# causes us to store more than our 3x fulltext goal.

2673

# Repository.iter_files_bytes() may be another possibility

2674

to_process = [version for version in order

2675

if version not in self.target]

2676

total = len(to_process)

2677

pb = ui.ui_factory.nested_progress_bar()

2678

try:

2679

for index, version in enumerate(to_process):

2680

pb.update('Converting versioned data', index, total)

2681

sha1, num_bytes, parent_text = self.target.add_lines(version,

2682

self.source.get_parents_with_ghosts(version),

2683

self.source.get_lines(version),

2684

parent_texts=parent_cache)

2685

parent_cache[version] = parent_text

2686

finally:

2687

pb.finished()

2688

return total

2689

2690

1602

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2691

1603

"""See InterVersionedFile.join."""

2692

1604

assert isinstance(self.source, KnitVersionedFile)

2693

1605

assert isinstance(self.target, KnitVersionedFile)

2694

1606

2695

# If the source and target are mismatched w.r.t. annotations vs

2696

# plain, the data needs to be converted accordingly

2697

if self.source.factory.annotated == self.target.factory.annotated:

2698

converter = None

2699

elif self.source.factory.annotated:

2700

converter = self._anno_to_plain_converter

2701

else:

2702

# We're converting from a plain to an annotated knit. Copy them

2703

# across by full texts.

2704

return self._copy_texts(pb, msg, version_ids, ignore_missing)

2705

2706

1607

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1608

2707

1609

if not version_ids:

2708

1610

return 0

2709

1611

2713

1615

if None in version_ids:

2714

1616

version_ids.remove(None)

2715

1617

2716

self.source_ancestry = set(self.source.get_ancestry(version_ids,

2717

topo_sorted=False))

1618

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2718

1619

this_versions = set(self.target._index.get_versions())

2719

# XXX: For efficiency we should not look at the whole index,

2720

# we only need to consider the referenced revisions - they

2721

# must all be present, or the method must be full-text.

2722

# TODO, RBC 20070919

2723

1620

needed_versions = self.source_ancestry - this_versions

1621

cross_check_versions = self.source_ancestry.intersection(this_versions)

1622

mismatched_versions = set()

1623

for version in cross_check_versions:

1624

# scan to include needed parents.

1625

n1 = set(self.target.get_parents_with_ghosts(version))

1626

n2 = set(self.source.get_parents_with_ghosts(version))

1627

if n1 != n2:

1628

# FIXME TEST this check for cycles being introduced works

1629

# the logic is we have a cycle if in our graph we are an

1630

# ancestor of any of the n2 revisions.

1631

for parent in n2:

1632

if parent in n1:

1633

# safe

1634

continue

1635

else:

1636

parent_ancestors = self.source.get_ancestry(parent)

1637

if version in parent_ancestors:

1638

raise errors.GraphCycleError([parent, version])

1639

# ensure this parent will be available later.

1640

new_parents = n2.difference(n1)

1641

needed_versions.update(new_parents.difference(this_versions))

1642

mismatched_versions.add(version)

2724

1643

2725

if not needed_versions:

1644

if not needed_versions and not mismatched_versions:

2726

1645

return 0

2727

1646

full_list = topo_sort(self.source.get_graph())

2728

1647

2745

1664

assert (self.target.has_version(parent) or

2746

1665

parent in copy_set or

2747

1666

not self.source.has_version(parent))

2748

index_memo = self.source._index.get_position(version_id)

2749

copy_queue_records.append((version_id, index_memo))

1667

data_pos, data_size = self.source._index.get_position(version_id)

1668

copy_queue_records.append((version_id, data_pos, data_size))

2750

1669

copy_queue.append((version_id, options, parents))

2751

1670

copy_set.add(version_id)

2752

1671

2762

1681

assert version_id == version_id2, 'logic error, inconsistent results'

2763

1682

count = count + 1

2764

1683

pb.update("Joining knit", count, total)

2765

if converter:

2766

size, raw_data = converter(raw_data, version_id, options,

2767

parents)

2768

else:

2769

size = len(raw_data)

2770

raw_records.append((version_id, options, parents, size))

1684

raw_records.append((version_id, options, parents, len(raw_data)))

2771

1685

raw_datum.append(raw_data)

2772

1686

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1687

1688

for version in mismatched_versions:

1689

# FIXME RBC 20060309 is this needed?

1690

n1 = set(self.target.get_parents_with_ghosts(version))

1691

n2 = set(self.source.get_parents_with_ghosts(version))

1692

# write a combined record to our history preserving the current

1693

# parents as first in the list

1694

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1695

self.target.fix_parents(version, new_parents)

2773

1696

return count

2774

1697

finally:

2775

1698

pb.finished()

2776

1699

2777

def _anno_to_plain_converter(self, raw_data, version_id, options,

2778

parents):

2779

"""Convert annotated content to plain content."""

2780

data, digest = self.source._data._parse_record(version_id, raw_data)

2781

if 'fulltext' in options:

2782

content = self.source.factory.parse_fulltext(data, version_id)

2783

lines = self.target.factory.lower_fulltext(content)

2784

else:

2785

delta = self.source.factory.parse_line_delta(data, version_id,

2786

plain=True)

2787

lines = self.target.factory.lower_line_delta(delta)

2788

return self.target._data._record_to_data(version_id, digest, lines)

2789

2790

1700

2791

1701

InterVersionedFile.register_optimiser(InterKnit)

2792

1702

2823

1733

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2824

1734

this_versions = set(self.target._index.get_versions())

2825

1735

needed_versions = self.source_ancestry - this_versions

1736

cross_check_versions = self.source_ancestry.intersection(this_versions)

1737

mismatched_versions = set()

1738

for version in cross_check_versions:

1739

# scan to include needed parents.

1740

n1 = set(self.target.get_parents_with_ghosts(version))

1741

n2 = set(self.source.get_parents(version))

1742

# if all of n2's parents are in n1, then its fine.

1743

if n2.difference(n1):

1744

# FIXME TEST this check for cycles being introduced works

1745

# the logic is we have a cycle if in our graph we are an

1746

# ancestor of any of the n2 revisions.

1747

for parent in n2:

1748

if parent in n1:

1749

# safe

1750

continue

1751

else:

1752

parent_ancestors = self.source.get_ancestry(parent)

1753

if version in parent_ancestors:

1754

raise errors.GraphCycleError([parent, version])

1755

# ensure this parent will be available later.

1756

new_parents = n2.difference(n1)

1757

needed_versions.update(new_parents.difference(this_versions))

1758

mismatched_versions.add(version)

2826

1759

2827

if not needed_versions:

1760

if not needed_versions and not mismatched_versions:

2828

1761

return 0

2829

1762

full_list = topo_sort(self.source.get_graph())

2830

1763

2844

1777

self.target.add_lines(

2845

1778

version_id, parents, self.source.get_lines(version_id))

2846

1779

count = count + 1

1780

1781

for version in mismatched_versions:

1782

# FIXME RBC 20060309 is this needed?

1783

n1 = set(self.target.get_parents_with_ghosts(version))

1784

n2 = set(self.source.get_parents(version))

1785

# write a combined record to our history preserving the current

1786

# parents as first in the list

1787

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1788

self.target.fix_parents(version, new_parents)

2847

1789

return count

2848

1790

finally:

2849

1791

pb.finished()

2852

1794

InterVersionedFile.register_optimiser(WeaveToKnit)

2853

1795

2854

1796

2855

# Deprecated, use PatienceSequenceMatcher instead

2856

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2857

2858

2859

def annotate_knit(knit, revision_id):

2860

"""Annotate a knit with no cached annotations.

2861

2862

This implementation is for knits with no cached annotations.

2863

It will work for knits with cached annotations, but this is not

2864

recommended.

1797

class KnitSequenceMatcher(difflib.SequenceMatcher):

1798

"""Knit tuned sequence matcher.

1799

1800

This is based on profiling of difflib which indicated some improvements

1801

for our usage pattern.

2865

1802

"""

2866

annotator = _KnitAnnotator(knit)

2867

return iter(annotator.annotate(revision_id))

2868

2869

2870

class _KnitAnnotator(object):

2871

"""Build up the annotations for a text."""

2872

2873

def __init__(self, knit):

2874

self._knit = knit

2875

2876

# Content objects, differs from fulltexts because of how final newlines

2877

# are treated by knits. the content objects here will always have a

2878

# final newline

2879

self._fulltext_contents = {}

2880

2881

# Annotated lines of specific revisions

2882

self._annotated_lines = {}

2883

2884

# Track the raw data for nodes that we could not process yet.

2885

# This maps the revision_id of the base to a list of children that will

2886

# annotated from it.

2887

self._pending_children = {}

2888

2889

# Nodes which cannot be extracted

2890

self._ghosts = set()

2891

2892

# Track how many children this node has, so we know if we need to keep

2893

# it

2894

self._annotate_children = {}

2895

self._compression_children = {}

2896

2897

self._all_build_details = {}

2898

# The children => parent revision_id graph

2899

self._revision_id_graph = {}

2900

2901

self._heads_provider = None

2902

2903

self._nodes_to_keep_annotations = set()

2904

self._generations_until_keep = 100

2905

2906

def set_generations_until_keep(self, value):

2907

"""Set the number of generations before caching a node.

2908

2909

Setting this to -1 will cache every merge node, setting this higher

2910

will cache fewer nodes.

2911

"""

2912

self._generations_until_keep = value

2913

2914

def _add_fulltext_content(self, revision_id, content_obj):

2915

self._fulltext_contents[revision_id] = content_obj

2916

# TODO: jam 20080305 It might be good to check the sha1digest here

2917

return content_obj.text()

2918

2919

def _check_parents(self, child, nodes_to_annotate):

2920

"""Check if all parents have been processed.

2921

2922

:param child: A tuple of (rev_id, parents, raw_content)

2923

:param nodes_to_annotate: If child is ready, add it to

2924

nodes_to_annotate, otherwise put it back in self._pending_children

2925

"""

2926

for parent_id in child[1]:

2927

if (parent_id not in self._annotated_lines):

2928

# This parent is present, but another parent is missing

2929

self._pending_children.setdefault(parent_id,

2930

[]).append(child)

2931

break

2932

else:

2933

# This one is ready to be processed

2934

nodes_to_annotate.append(child)

2935

2936

def _add_annotation(self, revision_id, fulltext, parent_ids,

2937

left_matching_blocks=None):

2938

"""Add an annotation entry.

2939

2940

All parents should already have been annotated.

2941

:return: A list of children that now have their parents satisfied.

2942

"""

2943

a = self._annotated_lines

2944

annotated_parent_lines = [a[p] for p in parent_ids]

2945

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2946

fulltext, revision_id, left_matching_blocks,

2947

heads_provider=self._get_heads_provider()))

2948

self._annotated_lines[revision_id] = annotated_lines

2949

for p in parent_ids:

2950

ann_children = self._annotate_children[p]

2951

ann_children.remove(revision_id)

2952

if (not ann_children

2953

and p not in self._nodes_to_keep_annotations):

2954

del self._annotated_lines[p]

2955

del self._all_build_details[p]

2956

if p in self._fulltext_contents:

2957

del self._fulltext_contents[p]

2958

# Now that we've added this one, see if there are any pending

2959

# deltas to be done, certainly this parent is finished

2960

nodes_to_annotate = []

2961

for child in self._pending_children.pop(revision_id, []):

2962

self._check_parents(child, nodes_to_annotate)

2963

return nodes_to_annotate

2964

2965

def _get_build_graph(self, revision_id):

2966

"""Get the graphs for building texts and annotations.

2967

2968

The data you need for creating a full text may be different than the

2969

data you need to annotate that text. (At a minimum, you need both

2970

parents to create an annotation, but only need 1 parent to generate the

2971

fulltext.)

2972

2973

:return: A list of (revision_id, index_memo) records, suitable for

2974

passing to read_records_iter to start reading in the raw data fro/

2975

the pack file.

2976

"""

2977

if revision_id in self._annotated_lines:

2978

# Nothing to do

2979

return []

2980

pending = set([revision_id])

2981

records = []

2982

generation = 0

2983

kept_generation = 0

2984

while pending:

2985

# get all pending nodes

2986

generation += 1

2987

this_iteration = pending

2988

build_details = self._knit._index.get_build_details(this_iteration)

2989

self._all_build_details.update(build_details)

2990

# new_nodes = self._knit._index._get_entries(this_iteration)

2991

pending = set()

2992

for rev_id, details in build_details.iteritems():

2993

(index_memo, compression_parent, parents,

2994

record_details) = details

2995

self._revision_id_graph[rev_id] = parents

2996

records.append((rev_id, index_memo))

2997

# Do we actually need to check _annotated_lines?

2998

pending.update(p for p in parents

2999

if p not in self._all_build_details)

3000

if compression_parent:

3001

self._compression_children.setdefault(compression_parent,

3002

[]).append(rev_id)

3003

if parents:

3004

for parent in parents:

3005

self._annotate_children.setdefault(parent,

3006

[]).append(rev_id)

3007

num_gens = generation - kept_generation

3008

if ((num_gens >= self._generations_until_keep)

3009

and len(parents) > 1):

3010

kept_generation = generation

3011

self._nodes_to_keep_annotations.add(rev_id)

3012

3013

missing_versions = this_iteration.difference(build_details.keys())

3014

self._ghosts.update(missing_versions)

3015

for missing_version in missing_versions:

3016

# add a key, no parents

3017

self._revision_id_graph[missing_version] = ()

3018

pending.discard(missing_version) # don't look for it

3019

# XXX: This should probably be a real exception, as it is a data

3020

# inconsistency

3021

assert not self._ghosts.intersection(self._compression_children), \

3022

"We cannot have nodes which have a compression parent of a ghost."

3023

# Cleanout anything that depends on a ghost so that we don't wait for

3024

# the ghost to show up

3025

for node in self._ghosts:

3026

if node in self._annotate_children:

3027

# We won't be building this node

3028

del self._annotate_children[node]

3029

# Generally we will want to read the records in reverse order, because

3030

# we find the parent nodes after the children

3031

records.reverse()

3032

return records

3033

3034

def _annotate_records(self, records):

3035

"""Build the annotations for the listed records."""

3036

# We iterate in the order read, rather than a strict order requested

3037

# However, process what we can, and put off to the side things that

3038

# still need parents, cleaning them up when those parents are

3039

# processed.

3040

for (rev_id, record,

3041

digest) in self._knit._data.read_records_iter(records):

3042

if rev_id in self._annotated_lines:

3043

continue

3044

parent_ids = self._revision_id_graph[rev_id]

3045

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3046

details = self._all_build_details[rev_id]

3047

(index_memo, compression_parent, parents,

3048

record_details) = details

3049

nodes_to_annotate = []

3050

# TODO: Remove the punning between compression parents, and

3051

# parent_ids, we should be able to do this without assuming

3052

# the build order

3053

if len(parent_ids) == 0:

3054

# There are no parents for this node, so just add it

3055

# TODO: This probably needs to be decoupled

3056

assert compression_parent is None

3057

fulltext_content, delta = self._knit.factory.parse_record(

3058

rev_id, record, record_details, None)

3059

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3060

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3061

parent_ids, left_matching_blocks=None))

1803

1804

def find_longest_match(self, alo, ahi, blo, bhi):

1805

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1806

1807

If isjunk is not defined:

1808

1809

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1810

alo <= i <= i+k <= ahi

1811

blo <= j <= j+k <= bhi

1812

and for all (i',j',k') meeting those conditions,

1813

k >= k'

1814

i <= i'

1815

and if i == i', j <= j'

1816

1817

In other words, of all maximal matching blocks, return one that

1818

starts earliest in a, and of all those maximal matching blocks that

1819

start earliest in a, return the one that starts earliest in b.

1820

1821

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1822

>>> s.find_longest_match(0, 5, 0, 9)

1823

(0, 4, 5)

1824

1825

If isjunk is defined, first the longest matching block is

1826

determined as above, but with the additional restriction that no

1827

junk element appears in the block. Then that block is extended as

1828

far as possible by matching (only) junk elements on both sides. So

1829

the resulting block never matches on junk except as identical junk

1830

happens to be adjacent to an "interesting" match.

1831

1832

Here's the same example as before, but considering blanks to be

1833

junk. That prevents " abcd" from matching the " abcd" at the tail

1834

end of the second sequence directly. Instead only the "abcd" can

1835

match, and matches the leftmost "abcd" in the second sequence:

1836

1837

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1838

>>> s.find_longest_match(0, 5, 0, 9)

1839

(1, 0, 4)

1840

1841

If no blocks match, return (alo, blo, 0).

1842

1843

>>> s = SequenceMatcher(None, "ab", "c")

1844

>>> s.find_longest_match(0, 2, 0, 1)

1845

(0, 0, 0)

1846

"""

1847

1848

# CAUTION: stripping common prefix or suffix would be incorrect.

1849

# E.g.,

1850

# ab

1851

# acab

1852

# Longest matching block is "ab", but if common prefix is

1853

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1854

# strip, so ends up claiming that ab is changed to acab by

1855

# inserting "ca" in the middle. That's minimal but unintuitive:

1856

# "it's obvious" that someone inserted "ac" at the front.

1857

# Windiff ends up at the same place as diff, but by pairing up

1858

# the unique 'b's and then matching the first two 'a's.

1859

1860

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1861

besti, bestj, bestsize = alo, blo, 0

1862

# find longest junk-free match

1863

# during an iteration of the loop, j2len[j] = length of longest

1864

# junk-free match ending with a[i-1] and b[j]

1865

j2len = {}

1866

# nothing = []

1867

b2jget = b2j.get

1868

for i in xrange(alo, ahi):

1869

# look at all instances of a[i] in b; note that because

1870

# b2j has no junk keys, the loop is skipped if a[i] is junk

1871

j2lenget = j2len.get

1872

newj2len = {}

1873

1874

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1875

# following improvement

1876

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1877

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1878

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1879

# to

1880

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1881

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1882

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1883

1884

try:

1885

js = b2j[a[i]]

1886

except KeyError:

1887

pass

3062

1888

else:

3063

child = (rev_id, parent_ids, record)

3064

# Check if all the parents are present

3065

self._check_parents(child, nodes_to_annotate)

3066

while nodes_to_annotate:

3067

# Should we use a queue here instead of a stack?

3068

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3069

(index_memo, compression_parent, parents,

3070

record_details) = self._all_build_details[rev_id]

3071

if compression_parent is not None:

3072

comp_children = self._compression_children[compression_parent]

3073

assert rev_id in comp_children

3074

# If there is only 1 child, it is safe to reuse this

3075

# content

3076

reuse_content = (len(comp_children) == 1

3077

and compression_parent not in

3078

self._nodes_to_keep_annotations)

3079

if reuse_content:

3080

# Remove it from the cache since it will be changing

3081

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3082

# Make sure to copy the fulltext since it might be

3083

# modified

3084

parent_fulltext = list(parent_fulltext_content.text())

3085

else:

3086

parent_fulltext_content = self._fulltext_contents[compression_parent]

3087

parent_fulltext = parent_fulltext_content.text()

3088

comp_children.remove(rev_id)

3089

fulltext_content, delta = self._knit.factory.parse_record(

3090

rev_id, record, record_details,

3091

parent_fulltext_content,

3092

copy_base_content=(not reuse_content))

3093

fulltext = self._add_fulltext_content(rev_id,

3094

fulltext_content)

3095

blocks = KnitContent.get_line_delta_blocks(delta,

3096

parent_fulltext, fulltext)

3097

else:

3098

fulltext_content = self._knit.factory.parse_fulltext(

3099

record, rev_id)

3100

fulltext = self._add_fulltext_content(rev_id,

3101

fulltext_content)

3102

blocks = None

3103

nodes_to_annotate.extend(

3104

self._add_annotation(rev_id, fulltext, parent_ids,

3105

left_matching_blocks=blocks))

3106

3107

def _get_heads_provider(self):

3108

"""Create a heads provider for resolving ancestry issues."""

3109

if self._heads_provider is not None:

3110

return self._heads_provider

3111

parent_provider = _mod_graph.DictParentsProvider(

3112

self._revision_id_graph)

3113

graph_obj = _mod_graph.Graph(parent_provider)

3114

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

3115

self._heads_provider = head_cache

3116

return head_cache

3117

3118

def annotate(self, revision_id):

3119

"""Return the annotated fulltext at the given revision.

3120

3121

:param revision_id: The revision id for this file

3122

"""

3123

records = self._get_build_graph(revision_id)

3124

if revision_id in self._ghosts:

3125

raise errors.RevisionNotPresent(revision_id, self._knit)

3126

self._annotate_records(records)

3127

return self._annotated_lines[revision_id]

3128

3129

3130

try:

3131

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3132

except ImportError:

3133

from bzrlib._knit_load_data_py import _load_data_py as _load_data

1889

for j in js:

1890

# a[i] matches b[j]

1891

if j >= blo:

1892

if j >= bhi:

1893

break

1894

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1895

if k > bestsize:

1896

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1897

j2len = newj2len

1898

1899

# Extend the best by non-junk elements on each end. In particular,

1900

# "popular" non-junk elements aren't in b2j, which greatly speeds

1901

# the inner loop above, but also means "the best" match so far

1902

# doesn't contain any junk *or* popular non-junk elements.

1903

while besti > alo and bestj > blo and \

1904

not isbjunk(b[bestj-1]) and \

1905

a[besti-1] == b[bestj-1]:

1906

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1907

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1908

not isbjunk(b[bestj+bestsize]) and \

1909

a[besti+bestsize] == b[bestj+bestsize]:

1910

bestsize += 1

1911

1912

# Now that we have a wholly interesting match (albeit possibly

1913

# empty!), we may as well suck up the matching junk on each

1914

# side of it too. Can't think of a good reason not to, and it

1915

# saves post-processing the (possibly considerable) expense of

1916

# figuring out what to do with it. In the case of an empty

1917

# interesting match, this is clearly the right thing to do,

1918

# because no other kind of match is possible in the regions.

1919

while besti > alo and bestj > blo and \

1920

isbjunk(b[bestj-1]) and \

1921

a[besti-1] == b[bestj-1]:

1922

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1923

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1924

isbjunk(b[bestj+bestsize]) and \

1925

a[besti+bestsize] == b[bestj+bestsize]:

1926

bestsize = bestsize + 1

1927

1928

return besti, bestj, bestsize

Older »