~bzr-pqm/bzr/bzr.dev

Committer: Vincent Ladeuil
Date: 2007-11-04 15:24:27 UTC
mto: (2961.1.1 trunk)
mto: This revision was merged to the branch mainline in revision 2962.
Revision ID: v.ladeuil+lp@free.fr-20071104152427-p9k7e4toywa87wfc

Review feedback.

* doc/en/user-guide/authentication_conf.txt:
New file. Authentication configuration file documentation.

* doc/en/user-guide/configuration.txt:
Slight modifications, add authentication.conf reference.

* doc/en/mini-tutorial/index.txt:
Fix make docs warning.

* doc/developers/authentication-ring.txt:
Small cleanups noticed during
doc/en/user-guide/authentication_conf.txt redaction.

* bzrlib/transport/http/_urllib.py:
(HttpTransport_urllib._perform): Use a dict() instead of {} syntax.

* bzrlib/tests/blackbox/test_whoami.py:
(TestWhoami.test_whoami_branch): Delete BZREMAIL related tests.

* bzrlib/config.py:
(Config.username): BZREMAIL deleted, has been obsolete for more
than a year.
(AuthenticationConfig.__init__): Review feedback, since keeping a
callback as an attribute is useless, call it now and keep the
filename itself as an attribute.
(AuthenticationConfig.get_credentials): Use a dict() instead of {}
syntax.

* NEWS:
Updated as per Martin's suggestion.

files added:
bzrlib/plugins/multiparent.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_escaped_store.py

bzrlib/xml6.py

doc/en/user-reference/index.txt

tools/win32/survey.txt

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.h

bzrlib/_walkdirs_win32.pyx

bzrlib/btree_index.py

bzrlib/chunk_writer.py

bzrlib/directory_service.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/lru_cache.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/smart/message.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_add_fallback_repository.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/version_info_formats/format_custom.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

doc/developers/development-repo.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/win32/run_script.py

files renamed:
bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/xml8.py => bzrlib/xml5.py

bzrlib/help_topics/en/authentication.txt => doc/en/user-guide/authentication_conf.txt

doc/en/tutorials/centralized_workflow.txt => doc/en/user-guide/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/en/user-guide/configuration.txt

bzrlib/help_topics/en/conflicts.txt => doc/en/user-guide/conflicts.txt

doc/en/tutorials/tutorial.txt => doc/en/user-guide/tutorial.txt

bzrlib/help_topics/en/hooks.txt => doc/en/user-reference/hooks.txt

files modified:
.bzrignore

Makefile

NEWS

README

bzr.ico

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

doc/default.css

doc/developers/HACKING.txt

doc/developers/authentication-ring.txt

doc/developers/bundles.txt

doc/developers/index.txt

doc/developers/performance-contributing.txt

doc/developers/repository.txt

doc/en/mini-tutorial/index.txt

doc/en/quick-reference/quick-start-summary.svg

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/index.txt

setup.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.osutils import (

100

105

contains_whitespace,

101

106

contains_linebreaks,

102

107

sha_string,

103

108

sha_strings,

104

split_lines,

105

)

106

from bzrlib.versionedfile import (

107

AbsentContentFactory,

108

adapter_registry,

109

ConstantMapper,

110

ContentFactory,

111

FulltextContentFactory,

112

VersionedFile,

113

VersionedFiles,

114

)

109

)

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

111

from bzrlib.tsort import topo_sort

112

import bzrlib.ui

113

import bzrlib.weave

114

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

115

116

117

# TODO: Split out code specific to this format into an associated object.

131

INDEX_SUFFIX = '.kndx'

132

133

134

class KnitAdapter(object):

135

"""Base class for knit record adaption."""

136

137

def __init__(self, basis_vf):

138

"""Create an adapter which accesses full texts from basis_vf.

139

140

:param basis_vf: A versioned file to access basis texts of deltas from.

141

May be None for adapters that do not need to access basis texts.

142

"""

143

self._data = KnitVersionedFiles(None, None)

144

self._annotate_factory = KnitAnnotateFactory()

145

self._plain_factory = KnitPlainFactory()

146

self._basis_vf = basis_vf

147

148

149

class FTAnnotatedToUnannotated(KnitAdapter):

150

"""An adapter from FT annotated knits to unannotated ones."""

151

152

def get_bytes(self, factory, annotated_compressed_bytes):

153

rec, contents = \

154

self._data._parse_record_unchecked(annotated_compressed_bytes)

155

content = self._annotate_factory.parse_fulltext(contents, rec[1])

156

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

157

return bytes

158

159

160

class DeltaAnnotatedToUnannotated(KnitAdapter):

161

"""An adapter for deltas from annotated to unannotated."""

162

163

def get_bytes(self, factory, annotated_compressed_bytes):

164

rec, contents = \

165

self._data._parse_record_unchecked(annotated_compressed_bytes)

166

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

167

plain=True)

168

contents = self._plain_factory.lower_line_delta(delta)

169

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

170

return bytes

171

172

173

class FTAnnotatedToFullText(KnitAdapter):

174

"""An adapter from FT annotated knits to unannotated ones."""

175

176

def get_bytes(self, factory, annotated_compressed_bytes):

177

rec, contents = \

178

self._data._parse_record_unchecked(annotated_compressed_bytes)

179

content, delta = self._annotate_factory.parse_record(factory.key[-1],

180

contents, factory._build_details, None)

181

return ''.join(content.text())

182

183

184

class DeltaAnnotatedToFullText(KnitAdapter):

185

"""An adapter for deltas from annotated to unannotated."""

186

187

def get_bytes(self, factory, annotated_compressed_bytes):

188

rec, contents = \

189

self._data._parse_record_unchecked(annotated_compressed_bytes)

190

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

191

plain=True)

192

compression_parent = factory.parents[0]

193

basis_entry = self._basis_vf.get_record_stream(

194

[compression_parent], 'unordered', True).next()

195

if basis_entry.storage_kind == 'absent':

196

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

197

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

198

# Manually apply the delta because we have one annotated content and

199

# one plain.

200

basis_content = PlainKnitContent(basis_lines, compression_parent)

201

basis_content.apply_delta(delta, rec[1])

202

basis_content._should_strip_eol = factory._build_details[1]

203

return ''.join(basis_content.text())

204

205

206

class FTPlainToFullText(KnitAdapter):

207

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory, compressed_bytes):

210

rec, contents = \

211

self._data._parse_record_unchecked(compressed_bytes)

212

content, delta = self._plain_factory.parse_record(factory.key[-1],

213

contents, factory._build_details, None)

214

return ''.join(content.text())

215

216

217

class DeltaPlainToFullText(KnitAdapter):

218

"""An adapter for deltas from annotated to unannotated."""

219

220

def get_bytes(self, factory, compressed_bytes):

221

rec, contents = \

222

self._data._parse_record_unchecked(compressed_bytes)

223

delta = self._plain_factory.parse_line_delta(contents, rec[1])

224

compression_parent = factory.parents[0]

225

# XXX: string splitting overhead.

226

basis_entry = self._basis_vf.get_record_stream(

227

[compression_parent], 'unordered', True).next()

228

if basis_entry.storage_kind == 'absent':

229

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

230

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

231

basis_content = PlainKnitContent(basis_lines, compression_parent)

232

# Manually apply the delta because we have one annotated content and

233

# one plain.

234

content, _ = self._plain_factory.parse_record(rec[1], contents,

235

factory._build_details, basis_content)

236

return ''.join(content.text())

237

238

239

class KnitContentFactory(ContentFactory):

240

"""Content factory for streaming from knits.

241

242

:seealso ContentFactory:

243

"""

244

245

def __init__(self, key, parents, build_details, sha1, raw_record,

246

annotated, knit=None):

247

"""Create a KnitContentFactory for key.

248

249

:param key: The key.

250

:param parents: The parents.

251

:param build_details: The build details as returned from

252

get_build_details.

253

:param sha1: The sha1 expected from the full text of this object.

254

:param raw_record: The bytes of the knit data from disk.

255

:param annotated: True if the raw data is annotated.

256

"""

257

ContentFactory.__init__(self)

258

self.sha1 = sha1

259

self.key = key

260

self.parents = parents

261

if build_details[0] == 'line-delta':

262

kind = 'delta'

263

else:

264

kind = 'ft'

265

if annotated:

266

annotated_kind = 'annotated-'

267

else:

268

annotated_kind = ''

269

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

270

self._raw_record = raw_record

271

self._build_details = build_details

272

self._knit = knit

273

274

def get_bytes_as(self, storage_kind):

275

if storage_kind == self.storage_kind:

276

return self._raw_record

277

if storage_kind == 'fulltext' and self._knit is not None:

278

return self._knit.get_text(self.key[0])

279

else:

280

raise errors.UnavailableRepresentation(self.key, storage_kind,

281

self.storage_kind)

282

283

284

134

class KnitContent(object):

285

"""Content of a knit version to which deltas can be applied.

286

287

This is always stored in memory as a list of lines with \n at the end,

288

plus a flag saying if the final ending is really there or not, because that

289

corresponds to the on-disk knit representation.

290

"""

291

292

def __init__(self):

293

self._should_strip_eol = False

294

295

def apply_delta(self, delta, new_version_id):

296

"""Apply delta to this object to become new_version_id."""

297

raise NotImplementedError(self.apply_delta)

135

"""Content of a knit version to which deltas can be applied."""

136

137

def annotate(self):

138

"""Return a list of (origin, text) tuples."""

139

return list(self.annotate_iter())

298

140

299

141

def line_delta_iter(self, new_lines):

300

142

"""Generate line-based delta from this content to new_lines."""

341

183

"""Annotated content."""

342

184

343

185

def __init__(self, lines):

344

KnitContent.__init__(self)

345

186

self._lines = lines

346

187

347

def annotate(self):

348

"""Return a list of (origin, text) for each content line."""

349

lines = self._lines[:]

350

if self._should_strip_eol:

351

origin, last_line = lines[-1]

352

lines[-1] = (origin, last_line.rstrip('\n'))

353

return lines

188

def annotate_iter(self):

189

"""Yield tuples of (origin, text) for each content line."""

190

return iter(self._lines)

354

191

355

def apply_delta(self, delta, new_version_id):

356

"""Apply delta to this object to become new_version_id."""

357

offset = 0

358

lines = self._lines

359

for start, end, count, delta_lines in delta:

360

lines[offset+start:offset+end] = delta_lines

361

offset = offset + (start - end) + count

192

def strip_last_line_newline(self):

193

line = self._lines[-1][1].rstrip('\n')

194

self._lines[-1] = (self._lines[-1][0], line)

362

195

363

196

def text(self):

364

197

try:

365

lines = [text for origin, text in self._lines]

198

return [text for origin, text in self._lines]

366

199

except ValueError, e:

367

200

# most commonly (only?) caused by the internal form of the knit

368

201

# missing annotation information because of a bug - see thread

370

203

raise KnitCorrupt(self,

371

204

"line in annotated knit missing annotation information: %s"

372

205

% (e,))

373

if self._should_strip_eol:

374

lines[-1] = lines[-1].rstrip('\n')

375

return lines

376

206

377

207

def copy(self):

378

208

return AnnotatedKnitContent(self._lines[:])

387

217

"""

388

218

389

219

def __init__(self, lines, version_id):

390

KnitContent.__init__(self)

391

220

self._lines = lines

392

221

self._version_id = version_id

393

222

394

def annotate(self):

395

"""Return a list of (origin, text) for each content line."""

396

return [(self._version_id, line) for line in self._lines]

397

398

def apply_delta(self, delta, new_version_id):

399

"""Apply delta to this object to become new_version_id."""

400

offset = 0

401

lines = self._lines

402

for start, end, count, delta_lines in delta:

403

lines[offset+start:offset+end] = delta_lines

404

offset = offset + (start - end) + count

405

self._version_id = new_version_id

223

def annotate_iter(self):

224

"""Yield tuples of (origin, text) for each content line."""

225

for line in self._lines:

226

yield self._version_id, line

406

227

407

228

def copy(self):

408

229

return PlainKnitContent(self._lines[:], self._version_id)

409

230

231

def strip_last_line_newline(self):

232

self._lines[-1] = self._lines[-1].rstrip('\n')

233

410

234

def text(self):

411

lines = self._lines

412

if self._should_strip_eol:

413

lines = lines[:]

414

lines[-1] = lines[-1].rstrip('\n')

415

return lines

416

417

418

class _KnitFactory(object):

419

"""Base class for common Factory functions."""

420

421

def parse_record(self, version_id, record, record_details,

422

base_content, copy_base_content=True):

423

"""Parse a record into a full content object.

424

425

:param version_id: The official version id for this content

426

:param record: The data returned by read_records_iter()

427

:param record_details: Details about the record returned by

428

get_build_details

429

:param base_content: If get_build_details returns a compression_parent,

430

you must return a base_content here, else use None

431

:param copy_base_content: When building from the base_content, decide

432

you can either copy it and return a new object, or modify it in

433

place.

434

:return: (content, delta) A Content object and possibly a line-delta,

435

delta may be None

436

"""

437

method, noeol = record_details

438

if method == 'line-delta':

439

if copy_base_content:

440

content = base_content.copy()

441

else:

442

content = base_content

443

delta = self.parse_line_delta(record, version_id)

444

content.apply_delta(delta, version_id)

445

else:

446

content = self.parse_fulltext(record, version_id)

447

delta = None

448

content._should_strip_eol = noeol

449

return (content, delta)

450

451

452

class KnitAnnotateFactory(_KnitFactory):

235

return self._lines

236

237

238

class KnitAnnotateFactory(object):

453

239

"""Factory for creating annotated Content objects."""

454

240

455

241

annotated = True

556

342

for origin, text in lines)

557

343

return out

558

344

559

def annotate(self, knit, key):

560

content = knit._get_content(key)

561

# adjust for the fact that serialised annotations are only key suffixes

562

# for this factory.

563

if type(key) == tuple:

564

prefix = key[:-1]

565

origins = content.annotate()

566

result = []

567

for origin, line in origins:

568

result.append((prefix + (origin,), line))

569

return result

570

else:

571

# XXX: This smells a bit. Why would key ever be a non-tuple here?

572

# Aren't keys defined to be tuples? -- spiv 20080618

573

return content.annotate()

574

575

576

class KnitPlainFactory(_KnitFactory):

345

def annotate_iter(self, knit, version_id):

346

content = knit._get_content(version_id)

347

return content.annotate_iter()

348

349

350

class KnitPlainFactory(object):

577

351

"""Factory for creating plain Content objects."""

578

352

579

353

annotated = False

630

404

out.extend(lines)

631

405

return out

632

406

633

def annotate(self, knit, key):

634

annotator = _KnitAnnotator(knit)

635

return annotator.annotate(key)

636

637

638

639

def make_file_factory(annotated, mapper):

640

"""Create a factory for creating a file based KnitVersionedFiles.

641

642

This is only functional enough to run interface tests, it doesn't try to

643

provide a full pack environment.

644

645

:param annotated: knit annotations are wanted.

646

:param mapper: The mapper from keys to paths.

647

"""

648

def factory(transport):

649

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

650

access = _KnitKeyAccess(transport, mapper)

651

return KnitVersionedFiles(index, access, annotated=annotated)

652

return factory

653

654

655

def make_pack_factory(graph, delta, keylength):

656

"""Create a factory for creating a pack based VersionedFiles.

657

658

This is only functional enough to run interface tests, it doesn't try to

659

provide a full pack environment.

660

661

:param graph: Store a graph.

662

:param delta: Delta compress contents.

663

:param keylength: How long should keys be.

664

"""

665

def factory(transport):

666

parents = graph or delta

667

ref_length = 0

668

if graph:

669

ref_length += 1

407

def annotate_iter(self, knit, version_id):

408

return annotate_knit(knit, version_id)

409

410

411

def make_empty_knit(transport, relpath):

412

"""Construct a empty knit at the specified location."""

413

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

414

415

416

class KnitVersionedFile(VersionedFile):

417

"""Weave-like structure with faster random access.

418

419

A knit stores a number of texts and a summary of the relationships

420

between them. Texts are identified by a string version-id. Texts

421

are normally stored and retrieved as a series of lines, but can

422

also be passed as single strings.

423

424

Lines are stored with the trailing newline (if any) included, to

425

avoid special cases for files with no final newline. Lines are

426

composed of 8-bit characters, not unicode. The combination of

427

these approaches should mean any 'binary' file can be safely

428

stored and retrieved.

429

"""

430

431

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

432

factory=None, delta=True, create=False, create_parent_dir=False,

433

delay_create=False, dir_mode=None, index=None, access_method=None):

434

"""Construct a knit at location specified by relpath.

435

436

:param create: If not True, only open an existing knit.

437

:param create_parent_dir: If True, create the parent directory if

438

creating the file fails. (This is used for stores with

439

hash-prefixes that may not exist yet)

440

:param delay_create: The calling code is aware that the knit won't

441

actually be created until the first data is stored.

442

:param index: An index to use for the knit.

443

"""

444

if access_mode is None:

445

access_mode = 'w'

446

super(KnitVersionedFile, self).__init__(access_mode)

447

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

448

self.transport = transport

449

self.filename = relpath

450

self.factory = factory or KnitAnnotateFactory()

451

self.writable = (access_mode == 'w')

452

self.delta = delta

453

454

self._max_delta_chain = 200

455

456

if index is None:

457

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

458

access_mode, create=create, file_mode=file_mode,

459

create_parent_dir=create_parent_dir, delay_create=delay_create,

460

dir_mode=dir_mode)

461

else:

462

self._index = index

463

if access_method is None:

464

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

465

((create and not len(self)) and delay_create), create_parent_dir)

466

else:

467

_access = access_method

468

if create and not len(self) and not delay_create:

469

_access.create()

470

self._data = _KnitData(_access)

471

472

def __repr__(self):

473

return '%s(%s)' % (self.__class__.__name__,

474

self.transport.abspath(self.filename))

475

476

def _check_should_delta(self, first_parents):

477

"""Iterate back through the parent listing, looking for a fulltext.

478

479

This is used when we want to decide whether to add a delta or a new

480

fulltext. It searches for _max_delta_chain parents. When it finds a

481

fulltext parent, it sees if the total size of the deltas leading up to

482

it is large enough to indicate that we want a new full text anyway.

483

484

Return True if we should create a new delta, False if we should use a

485

full text.

486

"""

487

delta_size = 0

488

fulltext_size = None

489

delta_parents = first_parents

490

for count in xrange(self._max_delta_chain):

491

parent = delta_parents[0]

492

method = self._index.get_method(parent)

493

index, pos, size = self._index.get_position(parent)

494

if method == 'fulltext':

495

fulltext_size = size

496

break

497

delta_size += size

498

delta_parents = self._index.get_parents(parent)

499

else:

500

# We couldn't find a fulltext, so we must create a new one

501

return False

502

503

return fulltext_size > delta_size

504

505

def _add_raw_records(self, records, data):

506

"""Add all the records 'records' with data pre-joined in 'data'.

507

508

:param records: A list of tuples(version_id, options, parents, size).

509

:param data: The data for the records. When it is written, the records

510

are adjusted to have pos pointing into data by the sum of

511

the preceding records sizes.

512

"""

513

# write all the data

514

raw_record_sizes = [record[3] for record in records]

515

positions = self._data.add_raw_records(raw_record_sizes, data)

516

offset = 0

517

index_entries = []

518

for (version_id, options, parents, size), access_memo in zip(

519

records, positions):

520

index_entries.append((version_id, options, access_memo, parents))

521

if self._data._do_cache:

522

self._data._cache[version_id] = data[offset:offset+size]

523

offset += size

524

self._index.add_versions(index_entries)

525

526

def enable_cache(self):

527

"""Start caching data for this knit"""

528

self._data.enable_cache()

529

530

def clear_cache(self):

531

"""Clear the data cache only."""

532

self._data.clear_cache()

533

534

def copy_to(self, name, transport):

535

"""See VersionedFile.copy_to()."""

536

# copy the current index to a temp index to avoid racing with local

537

# writes

538

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

539

self.transport.get(self._index._filename))

540

# copy the data file

541

f = self._data._open_file()

542

try:

543

transport.put_file(name + DATA_SUFFIX, f)

544

finally:

545

f.close()

546

# move the copied index into place

547

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

548

549

def create_empty(self, name, transport, mode=None):

550

return KnitVersionedFile(name, transport, factory=self.factory,

551

delta=self.delta, create=True)

552

553

def get_data_stream(self, required_versions):

554

"""Get a data stream for the specified versions.

555

556

Versions may be returned in any order, not necessarily the order

557

specified.

558

559

:param required_versions: The exact set of versions to be extracted.

560

Unlike some other knit methods, this is not used to generate a

561

transitive closure, rather it is used precisely as given.

562

563

:returns: format_signature, list of (version, options, length, parents),

564

reader_callable.

565

"""

566

if not isinstance(required_versions, set):

567

required_versions = set(required_versions)

568

# we don't care about inclusions, the caller cares.

569

# but we need to setup a list of records to visit.

570

for version_id in required_versions:

571

if not self.has_version(version_id):

572

raise RevisionNotPresent(version_id, self.filename)

573

# Pick the desired versions out of the index in oldest-to-newest order

574

version_list = []

575

for version_id in self.versions():

576

if version_id in required_versions:

577

version_list.append(version_id)

578

579

# create the list of version information for the result

580

copy_queue_records = []

581

copy_set = set()

582

result_version_list = []

583

for version_id in version_list:

584

options = self._index.get_options(version_id)

585

parents = self._index.get_parents_with_ghosts(version_id)

586

index_memo = self._index.get_position(version_id)

587

copy_queue_records.append((version_id, index_memo))

588

none, data_pos, data_size = index_memo

589

copy_set.add(version_id)

590

# version, options, length, parents

591

result_version_list.append((version_id, options, data_size,

592

parents))

593

594

# Read the compressed record data.

595

# XXX:

596

# From here down to the return should really be logic in the returned

597

# callable -- in a class that adapts read_records_iter_raw to read

598

# requests.

599

raw_datum = []

600

for (version_id, raw_data), \

601

(version_id2, options, _, parents) in \

602

izip(self._data.read_records_iter_raw(copy_queue_records),

603

result_version_list):

604

assert version_id == version_id2, 'logic error, inconsistent results'

605

raw_datum.append(raw_data)

606

pseudo_file = StringIO(''.join(raw_datum))

607

def read(length):

608

if length is None:

609

return pseudo_file.read()

610

else:

611

return pseudo_file.read(length)

612

return (self.get_format_signature(), result_version_list, read)

613

614

def _extract_blocks(self, version_id, source, target):

615

if self._index.get_method(version_id) != 'line-delta':

616

return None

617

parent, sha1, noeol, delta = self.get_delta(version_id)

618

return KnitContent.get_line_delta_blocks(delta, source, target)

619

620

def get_delta(self, version_id):

621

"""Get a delta for constructing version from some other version."""

622

self.check_not_reserved_id(version_id)

623

parents = self.get_parents(version_id)

624

if len(parents):

625

parent = parents[0]

626

else:

627

parent = None

628

index_memo = self._index.get_position(version_id)

629

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

630

noeol = 'no-eol' in self._index.get_options(version_id)

631

if 'fulltext' == self._index.get_method(version_id):

632

new_content = self.factory.parse_fulltext(data, version_id)

633

if parent is not None:

634

reference_content = self._get_content(parent)

635

old_texts = reference_content.text()

636

else:

637

old_texts = []

638

new_texts = new_content.text()

639

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

640

new_texts)

641

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

642

else:

643

delta = self.factory.parse_line_delta(data, version_id)

644

return parent, sha1, noeol, delta

645

646

def get_format_signature(self):

647

"""See VersionedFile.get_format_signature()."""

648

if self.factory.annotated:

649

annotated_part = "annotated"

650

else:

651

annotated_part = "plain"

652

return "knit-%s" % (annotated_part,)

653

654

def get_graph_with_ghosts(self):

655

"""See VersionedFile.get_graph_with_ghosts()."""

656

graph_items = self._index.get_graph()

657

return dict(graph_items)

658

659

def get_sha1(self, version_id):

660

return self.get_sha1s([version_id])[0]

661

662

def get_sha1s(self, version_ids):

663

"""See VersionedFile.get_sha1()."""

664

record_map = self._get_record_map(version_ids)

665

# record entry 2 is the 'digest'.

666

return [record_map[v][2] for v in version_ids]

667

668

@staticmethod

669

def get_suffixes():

670

"""See VersionedFile.get_suffixes()."""

671

return [DATA_SUFFIX, INDEX_SUFFIX]

672

673

def has_ghost(self, version_id):

674

"""True if there is a ghost reference in the file to version_id."""

675

# maybe we have it

676

if self.has_version(version_id):

677

return False

678

# optimisable if needed by memoising the _ghosts set.

679

items = self._index.get_graph()

680

for node, parents in items:

681

for parent in parents:

682

if parent not in self._index._cache:

683

if parent == version_id:

684

return True

685

return False

686

687

def insert_data_stream(self, (format, data_list, reader_callable)):

688

"""Insert knit records from a data stream into this knit.

689

690

If a version in the stream is already present in this knit, it will not

691

be inserted a second time. It will be checked for consistency with the

692

stored version however, and may cause a KnitCorrupt error to be raised

693

if the data in the stream disagrees with the already stored data.

694

695

:seealso: get_data_stream

696

"""

697

if format != self.get_format_signature():

698

trace.mutter('incompatible format signature inserting to %r', self)

699

raise KnitDataStreamIncompatible(

700

format, self.get_format_signature())

701

702

for version_id, options, length, parents in data_list:

703

if self.has_version(version_id):

704

# First check: the list of parents.

705

my_parents = self.get_parents_with_ghosts(version_id)

706

if my_parents != parents:

707

# XXX: KnitCorrupt is not quite the right exception here.

708

raise KnitCorrupt(

709

self.filename,

710

'parents list %r from data stream does not match '

711

'already recorded parents %r for %s'

712

% (parents, my_parents, version_id))

713

714

# Also check the SHA-1 of the fulltext this content will

715

# produce.

716

raw_data = reader_callable(length)

717

my_fulltext_sha1 = self.get_sha1(version_id)

718

df, rec = self._data._parse_record_header(version_id, raw_data)

719

stream_fulltext_sha1 = rec[3]

720

if my_fulltext_sha1 != stream_fulltext_sha1:

721

# Actually, we don't know if it's this knit that's corrupt,

722

# or the data stream we're trying to insert.

723

raise KnitCorrupt(

724

self.filename, 'sha-1 does not match %s' % version_id)

725

else:

726

if 'line-delta' in options:

727

# Make sure that this knit record is actually useful: a

728

# line-delta is no use unless we have its parent.

729

# Fetching from a broken repository with this problem

730

# shouldn't break the target repository.

731

if not self._index.has_version(parents[0]):

732

raise KnitCorrupt(

733

self.filename,

734

'line-delta from stream references '

735

'missing parent %s' % parents[0])

736

self._add_raw_records(

737

[(version_id, options, parents, length)],

738

reader_callable(length))

739

740

def versions(self):

741

"""See VersionedFile.versions."""

742

if 'evil' in debug.debug_flags:

743

trace.mutter_callsite(2, "versions scales with size of history")

744

return self._index.get_versions()

745

746

def has_version(self, version_id):

747

"""See VersionedFile.has_version."""

748

if 'evil' in debug.debug_flags:

749

trace.mutter_callsite(2, "has_version is a LBYL scenario")

750

return self._index.has_version(version_id)

751

752

__contains__ = has_version

753

754

def _merge_annotations(self, content, parents, parent_texts={},

755

delta=None, annotated=None,

756

left_matching_blocks=None):

757

"""Merge annotations for content. This is done by comparing

758

the annotations based on changed to the text.

759

"""

760

if left_matching_blocks is not None:

761

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

762

else:

763

delta_seq = None

764

if annotated:

765

for parent_id in parents:

766

merge_content = self._get_content(parent_id, parent_texts)

767

if (parent_id == parents[0] and delta_seq is not None):

768

seq = delta_seq

769

else:

770

seq = patiencediff.PatienceSequenceMatcher(

771

None, merge_content.text(), content.text())

772

for i, j, n in seq.get_matching_blocks():

773

if n == 0:

774

continue

775

# this appears to copy (origin, text) pairs across to the

776

# new content for any line that matches the last-checked

777

# parent.

778

content._lines[j:j+n] = merge_content._lines[i:i+n]

670

779

if delta:

671

ref_length += 1

672

max_delta_chain = 200

673

else:

674

max_delta_chain = 0

675

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

676

key_elements=keylength)

677

stream = transport.open_write_stream('newpack')

678

writer = pack.ContainerWriter(stream.write)

679

writer.begin()

680

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

681

deltas=delta, add_callback=graph_index.add_nodes)

682

access = _DirectPackAccess({})

683

access.set_writer(writer, graph_index, (transport, 'newpack'))

684

result = KnitVersionedFiles(index, access,

685

max_delta_chain=max_delta_chain)

686

result.stream = stream

687

result.writer = writer

688

return result

689

return factory

690

691

692

def cleanup_pack_knit(versioned_files):

693

versioned_files.stream.close()

694

versioned_files.writer.end()

695

696

697

class KnitVersionedFiles(VersionedFiles):

698

"""Storage for many versioned files using knit compression.

699

700

Backend storage is managed by indices and data objects.

701

702

:ivar _index: A _KnitGraphIndex or similar that can describe the

703

parents, graph, compression and data location of entries in this

704

KnitVersionedFiles. Note that this is only the index for

705

*this* vfs; if there are fallbacks they must be queried separately.

706

"""

707

708

def __init__(self, index, data_access, max_delta_chain=200,

709

annotated=False):

710

"""Create a KnitVersionedFiles with index and data_access.

711

712

:param index: The index for the knit data.

713

:param data_access: The access object to store and retrieve knit

714

records.

715

:param max_delta_chain: The maximum number of deltas to permit during

716

insertion. Set to 0 to prohibit the use of deltas.

717

:param annotated: Set to True to cause annotations to be calculated and

718

stored during insertion.

719

"""

720

self._index = index

721

self._access = data_access

722

self._max_delta_chain = max_delta_chain

723

if annotated:

724

self._factory = KnitAnnotateFactory()

725

else:

726

self._factory = KnitPlainFactory()

727

self._fallback_vfs = []

728

729

def add_fallback_versioned_files(self, a_versioned_files):

730

"""Add a source of texts for texts not present in this knit.

731

732

:param a_versioned_files: A VersionedFiles object.

733

"""

734

self._fallback_vfs.append(a_versioned_files)

735

736

def add_lines(self, key, parents, lines, parent_texts=None,

737

left_matching_blocks=None, nostore_sha=None, random_id=False,

738

check_content=True):

739

"""See VersionedFiles.add_lines()."""

740

self._index._check_write_ok()

741

self._check_add(key, lines, random_id, check_content)

742

if parents is None:

743

# The caller might pass None if there is no graph data, but kndx

744

# indexes can't directly store that, so we give them

745

# an empty tuple instead.

746

parents = ()

747

return self._add(key, lines, parents,

780

if delta_seq is None:

781

reference_content = self._get_content(parents[0], parent_texts)

782

new_texts = content.text()

783

old_texts = reference_content.text()

784

delta_seq = patiencediff.PatienceSequenceMatcher(

785

None, old_texts, new_texts)

786

return self._make_line_delta(delta_seq, content)

787

788

def _make_line_delta(self, delta_seq, new_content):

789

"""Generate a line delta from delta_seq and new_content."""

790

diff_hunks = []

791

for op in delta_seq.get_opcodes():

792

if op[0] == 'equal':

793

continue

794

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

795

return diff_hunks

796

797

def _get_components_positions(self, version_ids):

798

"""Produce a map of position data for the components of versions.

799

800

This data is intended to be used for retrieving the knit records.

801

802

A dict of version_id to (method, data_pos, data_size, next) is

803

returned.

804

method is the way referenced data should be applied.

805

data_pos is the position of the data in the knit.

806

data_size is the size of the data in the knit.

807

next is the build-parent of the version, or None for fulltexts.

808

"""

809

component_data = {}

810

for version_id in version_ids:

811

cursor = version_id

812

813

while cursor is not None and cursor not in component_data:

814

method = self._index.get_method(cursor)

815

if method == 'fulltext':

816

next = None

817

else:

818

next = self.get_parents(cursor)[0]

819

index_memo = self._index.get_position(cursor)

820

component_data[cursor] = (method, index_memo, next)

821

cursor = next

822

return component_data

823

824

def _get_content(self, version_id, parent_texts={}):

825

"""Returns a content object that makes up the specified

826

version."""

827

cached_version = parent_texts.get(version_id, None)

828

if cached_version is not None:

829

if not self.has_version(version_id):

830

raise RevisionNotPresent(version_id, self.filename)

831

return cached_version

832

833

text_map, contents_map = self._get_content_maps([version_id])

834

return contents_map[version_id]

835

836

def _check_versions_present(self, version_ids):

837

"""Check that all specified versions are present."""

838

self._index.check_versions_present(version_ids)

839

840

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

841

nostore_sha, random_id, check_content):

842

"""See VersionedFile.add_lines_with_ghosts()."""

843

self._check_add(version_id, lines, random_id, check_content)

844

return self._add(version_id, lines, parents, self.delta,

845

parent_texts, None, nostore_sha, random_id)

846

847

def _add_lines(self, version_id, parents, lines, parent_texts,

848

left_matching_blocks, nostore_sha, random_id, check_content):

849

"""See VersionedFile.add_lines."""

850

self._check_add(version_id, lines, random_id, check_content)

851

self._check_versions_present(parents)

852

return self._add(version_id, lines[:], parents, self.delta,

748

853

parent_texts, left_matching_blocks, nostore_sha, random_id)

749

854

750

def _add(self, key, lines, parents, parent_texts,

855

def _check_add(self, version_id, lines, random_id, check_content):

856

"""check that version_id and lines are safe to add."""

857

if contains_whitespace(version_id):

858

raise InvalidRevisionId(version_id, self.filename)

859

self.check_not_reserved_id(version_id)

860

# Technically this could be avoided if we are happy to allow duplicate

861

# id insertion when other things than bzr core insert texts, but it

862

# seems useful for folk using the knit api directly to have some safety

863

# blanket that we can disable.

864

if not random_id and self.has_version(version_id):

865

raise RevisionAlreadyPresent(version_id, self.filename)

866

if check_content:

867

self._check_lines_not_unicode(lines)

868

self._check_lines_are_lines(lines)

869

870

def _add(self, version_id, lines, parents, delta, parent_texts,

751

871

left_matching_blocks, nostore_sha, random_id):

752

872

"""Add a set of lines on top of version specified by parents.

753

873

874

If delta is true, compress the text as a line-delta against

875

the first parent.

876

754

877

Any versions not present will be converted into ghosts.

755

878

"""

756

879

# first thing, if the content is something we don't need to store, find

763

886

present_parents = []

764

887

if parent_texts is None:

765

888

parent_texts = {}

766

# Do a single query to ascertain parent presence.

767

present_parent_map = self.get_parent_map(parents)

768

889

for parent in parents:

769

if parent in present_parent_map:

890

if self.has_version(parent):

770

891

present_parents.append(parent)

771

892

772

# Currently we can only compress against the left most present parent.

773

if (len(present_parents) == 0 or

774

present_parents[0] != parents[0]):

893

# can only compress against the left most present parent.

894

if (delta and

895

(len(present_parents) == 0 or

896

present_parents[0] != parents[0])):

775

897

delta = False

776

else:

777

# To speed the extract of texts the delta chain is limited

778

# to a fixed number of deltas. This should minimize both

779

# I/O and the time spend applying deltas.

780

delta = self._check_should_delta(present_parents[0])

781

898

782

899

text_length = len(line_bytes)

783

900

options = []

789

906

lines[-1] = lines[-1] + '\n'

790

907

line_bytes += '\n'

791

908

792

for element in key:

793

if type(element) != str:

794

raise TypeError("key contains non-strings: %r" % (key,))

795

# Knit hunks are still last-element only

796

version_id = key[-1]

797

content = self._factory.make(lines, version_id)

798

if 'no-eol' in options:

799

# Hint to the content object that its text() call should strip the

800

# EOL.

801

content._should_strip_eol = True

802

if delta or (self._factory.annotated and len(present_parents) > 0):

909

if delta:

910

# To speed the extract of texts the delta chain is limited

911

# to a fixed number of deltas. This should minimize both

912

# I/O and the time spend applying deltas.

913

delta = self._check_should_delta(present_parents)

914

915

assert isinstance(version_id, str)

916

content = self.factory.make(lines, version_id)

917

if delta or (self.factory.annotated and len(present_parents) > 0):

803

918

# Merge annotations from parent texts if needed.

804

919

delta_hunks = self._merge_annotations(content, present_parents,

805

parent_texts, delta, self._factory.annotated,

920

parent_texts, delta, self.factory.annotated,

806

921

left_matching_blocks)

807

922

808

923

if delta:

809

924

options.append('line-delta')

810

store_lines = self._factory.lower_line_delta(delta_hunks)

811

size, bytes = self._record_to_data(key, digest,

925

store_lines = self.factory.lower_line_delta(delta_hunks)

926

size, bytes = self._data._record_to_data(version_id, digest,

812

927

store_lines)

813

928

else:

814

929

options.append('fulltext')

815

930

# isinstance is slower and we have no hierarchy.

816

if self._factory.__class__ == KnitPlainFactory:

931

if self.factory.__class__ == KnitPlainFactory:

817

932

# Use the already joined bytes saving iteration time in

818

933

# _record_to_data.

819

size, bytes = self._record_to_data(key, digest,

934

size, bytes = self._data._record_to_data(version_id, digest,

820

935

lines, [line_bytes])

821

936

else:

822

937

# get mixed annotation + content and feed it into the

823

938

# serialiser.

824

store_lines = self._factory.lower_fulltext(content)

825

size, bytes = self._record_to_data(key, digest,

939

store_lines = self.factory.lower_fulltext(content)

940

size, bytes = self._data._record_to_data(version_id, digest,

826

941

store_lines)

827

942

828

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

829

self._index.add_records(

830

((key, options, access_memo, parents),),

943

access_memo = self._data.add_raw_records([size], bytes)[0]

944

self._index.add_versions(

945

((version_id, options, access_memo, parents),),

831

946

random_id=random_id)

832

947

return digest, text_length, content

833

948

834

def annotate(self, key):

835

"""See VersionedFiles.annotate."""

836

return self._factory.annotate(self, key)

837

838

949

def check(self, progress_bar=None):

839

"""See VersionedFiles.check()."""

840

# This doesn't actually test extraction of everything, but that will

841

# impact 'bzr check' substantially, and needs to be integrated with

842

# care. However, it does check for the obvious problem of a delta with

843

# no basis.

844

keys = self._index.keys()

845

parent_map = self.get_parent_map(keys)

846

for key in keys:

847

if self._index.get_method(key) != 'fulltext':

848

compression_parent = parent_map[key][0]

849

if compression_parent not in parent_map:

850

raise errors.KnitCorrupt(self,

851

"Missing basis parent %s for %s" % (

852

compression_parent, key))

853

for fallback_vfs in self._fallback_vfs:

854

fallback_vfs.check()

855

856

def _check_add(self, key, lines, random_id, check_content):

857

"""check that version_id and lines are safe to add."""

858

version_id = key[-1]

859

if contains_whitespace(version_id):

860

raise InvalidRevisionId(version_id, self)

861

self.check_not_reserved_id(version_id)

862

# TODO: If random_id==False and the key is already present, we should

863

# probably check that the existing content is identical to what is

864

# being inserted, and otherwise raise an exception. This would make

865

# the bundle code simpler.

866

if check_content:

867

self._check_lines_not_unicode(lines)

868

self._check_lines_are_lines(lines)

869

870

def _check_header(self, key, line):

871

rec = self._split_header(line)

872

self._check_header_version(rec, key[-1])

873

return rec

874

875

def _check_header_version(self, rec, version_id):

876

"""Checks the header version on original format knit records.

950

"""See VersionedFile.check()."""

951

952

def _clone_text(self, new_version_id, old_version_id, parents):

953

"""See VersionedFile.clone_text()."""

954

# FIXME RBC 20060228 make fast by only inserting an index with null

955

# delta.

956

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

957

958

def get_lines(self, version_id):

959

"""See VersionedFile.get_lines()."""

960

return self.get_line_list([version_id])[0]

961

962

def _get_record_map(self, version_ids):

963

"""Produce a dictionary of knit records.

877

964

878

These have the last component of the key embedded in the record.

879

"""

880

if rec[1] != version_id:

881

raise KnitCorrupt(self,

882

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

883

884

def _check_should_delta(self, parent):

885

"""Iterate back through the parent listing, looking for a fulltext.

886

887

This is used when we want to decide whether to add a delta or a new

888

fulltext. It searches for _max_delta_chain parents. When it finds a

889

fulltext parent, it sees if the total size of the deltas leading up to

890

it is large enough to indicate that we want a new full text anyway.

891

892

Return True if we should create a new delta, False if we should use a

893

full text.

894

"""

895

delta_size = 0

896

fulltext_size = None

897

for count in xrange(self._max_delta_chain):

898

# XXX: Collapse these two queries:

899

try:

900

# Note that this only looks in the index of this particular

901

# KnitVersionedFiles, not in the fallbacks. This ensures that

902

# we won't store a delta spanning physical repository

903

# boundaries.

904

method = self._index.get_method(parent)

905

except RevisionNotPresent:

906

# Some basis is not locally present: always delta

907

return False

908

index, pos, size = self._index.get_position(parent)

909

if method == 'fulltext':

910

fulltext_size = size

911

break

912

delta_size += size

913

# We don't explicitly check for presence because this is in an

914

# inner loop, and if it's missing it'll fail anyhow.

915

# TODO: This should be asking for compression parent, not graph

916

# parent.

917

parent = self._index.get_parent_map([parent])[parent][0]

918

else:

919

# We couldn't find a fulltext, so we must create a new one

920

return False

921

# Simple heuristic - if the total I/O wold be greater as a delta than

922

# the originally installed fulltext, we create a new fulltext.

923

return fulltext_size > delta_size

924

925

def _build_details_to_components(self, build_details):

926

"""Convert a build_details tuple to a position tuple."""

927

# record_details, access_memo, compression_parent

928

return build_details[3], build_details[0], build_details[1]

929

930

def _get_components_positions(self, keys, allow_missing=False):

931

"""Produce a map of position data for the components of keys.

932

933

This data is intended to be used for retrieving the knit records.

934

935

A dict of key to (record_details, index_memo, next, parents) is

936

returned.

937

method is the way referenced data should be applied.

938

index_memo is the handle to pass to the data access to actually get the

939

data

940

next is the build-parent of the version, or None for fulltexts.

941

parents is the version_ids of the parents of this version

942

943

:param allow_missing: If True do not raise an error on a missing component,

944

just ignore it.

945

"""

946

component_data = {}

947

pending_components = keys

948

while pending_components:

949

build_details = self._index.get_build_details(pending_components)

950

current_components = set(pending_components)

951

pending_components = set()

952

for key, details in build_details.iteritems():

953

(index_memo, compression_parent, parents,

954

record_details) = details

955

method = record_details[0]

956

if compression_parent is not None:

957

pending_components.add(compression_parent)

958

component_data[key] = self._build_details_to_components(details)

959

missing = current_components.difference(build_details)

960

if missing and not allow_missing:

961

raise errors.RevisionNotPresent(missing.pop(), self)

962

return component_data

963

964

def _get_content(self, key, parent_texts={}):

965

"""Returns a content object that makes up the specified

966

version."""

967

cached_version = parent_texts.get(key, None)

968

if cached_version is not None:

969

# Ensure the cache dict is valid.

970

if not self.get_parent_map([key]):

971

raise RevisionNotPresent(key, self)

972

return cached_version

973

text_map, contents_map = self._get_content_maps([key])

974

return contents_map[key]

975

976

def _get_content_maps(self, keys, nonlocal_keys=None):

965

The keys are version_ids, the values are tuples of (method, content,

966

digest, next).

967

method is the way the content should be applied.

968

content is a KnitContent object.

969

digest is the SHA1 digest of this version id after all steps are done

970

next is the build-parent of the version, i.e. the leftmost ancestor.

971

If the method is fulltext, next will be None.

972

"""

973

position_map = self._get_components_positions(version_ids)

974

# c = component_id, m = method, i_m = index_memo, n = next

975

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

976

record_map = {}

977

for component_id, content, digest in \

978

self._data.read_records_iter(records):

979

method, index_memo, next = position_map[component_id]

980

record_map[component_id] = method, content, digest, next

981

982

return record_map

983

984

def get_text(self, version_id):

985

"""See VersionedFile.get_text"""

986

return self.get_texts([version_id])[0]

987

988

def get_texts(self, version_ids):

989

return [''.join(l) for l in self.get_line_list(version_ids)]

990

991

def get_line_list(self, version_ids):

992

"""Return the texts of listed versions as a list of strings."""

993

for version_id in version_ids:

994

self.check_not_reserved_id(version_id)

995

text_map, content_map = self._get_content_maps(version_ids)

996

return [text_map[v] for v in version_ids]

997

998

_get_lf_split_line_list = get_line_list

999

1000

def _get_content_maps(self, version_ids):

977

1001

"""Produce maps of text and KnitContents

978

1002

979

:param keys: The keys to produce content maps for.

980

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

981

which are known to not be in this knit, but rather in one of the

982

fallback knits.

983

1003

:return: (text_map, content_map) where text_map contains the texts for

984

the requested versions and content_map contains the KnitContents.

1004

the requested versions and content_map contains the KnitContents.

1005

Both dicts take version_ids as their keys.

985

1006

"""

986

# FUTURE: This function could be improved for the 'extract many' case

987

# by tracking each component and only doing the copy when the number of

988

# children than need to apply delta's to it is > 1 or it is part of the

989

# final output.

990

keys = list(keys)

991

multiple_versions = len(keys) != 1

992

record_map = self._get_record_map(keys, allow_missing=True)

1007

record_map = self._get_record_map(version_ids)

993

1008

994

1009

text_map = {}

995

1010

content_map = {}

996

1011

final_content = {}

997

if nonlocal_keys is None:

998

nonlocal_keys = set()

999

else:

1000

nonlocal_keys = frozenset(nonlocal_keys)

1001

missing_keys = set(nonlocal_keys)

1002

for source in self._fallback_vfs:

1003

if not missing_keys:

1004

break

1005

for record in source.get_record_stream(missing_keys,

1006

'unordered', True):

1007

if record.storage_kind == 'absent':

1008

continue

1009

missing_keys.remove(record.key)

1010

lines = split_lines(record.get_bytes_as('fulltext'))

1011

text_map[record.key] = lines

1012

content_map[record.key] = PlainKnitContent(lines, record.key)

1013

if record.key in keys:

1014

final_content[record.key] = content_map[record.key]

1015

for key in keys:

1016

if key in nonlocal_keys:

1017

# already handled

1018

continue

1012

for version_id in version_ids:

1019

1013

components = []

1020

cursor = key

1014

cursor = version_id

1021

1015

while cursor is not None:

1022

try:

1023

record, record_details, digest, next = record_map[cursor]

1024

except KeyError:

1025

raise RevisionNotPresent(cursor, self)

1026

components.append((cursor, record, record_details, digest))

1027

cursor = next

1016

method, data, digest, next = record_map[cursor]

1017

components.append((cursor, method, data, digest))

1028

1018

if cursor in content_map:

1029

# no need to plan further back

1030

components.append((cursor, None, None, None))

1031

1019

break

1020

cursor = next

1032

1021

1033

1022

content = None

1034

for (component_id, record, record_details,

1035

digest) in reversed(components):

1023

for component_id, method, data, digest in reversed(components):

1036

1024

if component_id in content_map:

1037

1025

content = content_map[component_id]

1038

1026

else:

1039

content, delta = self._factory.parse_record(key[-1],

1040

record, record_details, content,

1041

copy_base_content=multiple_versions)

1042

if multiple_versions:

1043

content_map[component_id] = content

1027

if method == 'fulltext':

1028

assert content is None

1029

content = self.factory.parse_fulltext(data, version_id)

1030

elif method == 'line-delta':

1031

delta = self.factory.parse_line_delta(data, version_id)

1032

content = content.copy()

1033

content._lines = self._apply_delta(content._lines,

1034

delta)

1035

content_map[component_id] = content

1044

1036

1045

final_content[key] = content

1037

if 'no-eol' in self._index.get_options(version_id):

1038

content = content.copy()

1039

content.strip_last_line_newline()

1040

final_content[version_id] = content

1046

1041

1047

1042

# digest here is the digest from the last applied component.

1048

1043

text = content.text()

1049

1044

actual_sha = sha_strings(text)

1050

1045

if actual_sha != digest:

1051

raise KnitCorrupt(self,

1046

raise KnitCorrupt(self.filename,

1052

1047

'\n sha-1 %s'

1053

1048

'\n of reconstructed text does not match'

1054

1049

'\n expected %s'

1055

1050

'\n for version %s' %

1056

(actual_sha, digest, key))

1057

text_map[key] = text

1051

(actual_sha, digest, version_id))

1052

text_map[version_id] = text

1058

1053

return text_map, final_content

1059

1054

1060

def get_parent_map(self, keys):

1061

"""Get a map of the graph parents of keys.

1062

1063

:param keys: The keys to look up parents for.

1064

:return: A mapping from keys to parents. Absent keys are absent from

1065

the mapping.

1066

"""

1067

return self._get_parent_map_with_sources(keys)[0]

1068

1069

def _get_parent_map_with_sources(self, keys):

1070

"""Get a map of the parents of keys.

1071

1072

:param keys: The keys to look up parents for.

1073

:return: A tuple. The first element is a mapping from keys to parents.

1074

Absent keys are absent from the mapping. The second element is a

1075

list with the locations each key was found in. The first element

1076

is the in-this-knit parents, the second the first fallback source,

1077

and so on.

1078

"""

1079

result = {}

1080

sources = [self._index] + self._fallback_vfs

1081

source_results = []

1082

missing = set(keys)

1083

for source in sources:

1084

if not missing:

1085

break

1086

new_result = source.get_parent_map(missing)

1087

source_results.append(new_result)

1088

result.update(new_result)

1089

missing.difference_update(set(new_result))

1090

return result, source_results

1091

1092

def _get_record_map(self, keys, allow_missing=False):

1093

"""Produce a dictionary of knit records.

1094

1095

:return: {key:(record, record_details, digest, next)}

1096

record

1097

data returned from read_records

1098

record_details

1099

opaque information to pass to parse_record

1100

digest

1101

SHA1 digest of the full text after all steps are done

1102

1103

build-parent of the version, i.e. the leftmost ancestor.

1104

Will be None if the record is not a delta.

1105

:param keys: The keys to build a map for

1106

:param allow_missing: If some records are missing, rather than

1107

error, just return the data that could be generated.

1108

"""

1109

position_map = self._get_components_positions(keys,

1110

allow_missing=allow_missing)

1111

# key = component_id, r = record_details, i_m = index_memo, n = next

1112

records = [(key, i_m) for key, (r, i_m, n)

1113

in position_map.iteritems()]

1114

record_map = {}

1115

for key, record, digest in \

1116

self._read_records_iter(records):

1117

(record_details, index_memo, next) = position_map[key]

1118

record_map[key] = record, record_details, digest, next

1119

return record_map

1120

1121

def get_record_stream(self, keys, ordering, include_delta_closure):

1122

"""Get a stream of records for keys.

1123

1124

:param keys: The keys to include.

1125

:param ordering: Either 'unordered' or 'topological'. A topologically

1126

sorted stream has compression parents strictly before their

1127

children.

1128

:param include_delta_closure: If True then the closure across any

1129

compression parents will be included (in the opaque data).

1130

:return: An iterator of ContentFactory objects, each of which is only

1131

valid until the iterator is advanced.

1132

"""

1133

# keys might be a generator

1134

keys = set(keys)

1135

if not keys:

1136

return

1137

if not self._index.has_graph:

1138

# Cannot topological order when no graph has been stored.

1139

ordering = 'unordered'

1140

if include_delta_closure:

1141

positions = self._get_components_positions(keys, allow_missing=True)

1142

else:

1143

build_details = self._index.get_build_details(keys)

1144

# map from key to

1145

# (record_details, access_memo, compression_parent_key)

1146

positions = dict((key, self._build_details_to_components(details))

1147

for key, details in build_details.iteritems())

1148

absent_keys = keys.difference(set(positions))

1149

# There may be more absent keys : if we're missing the basis component

1150

# and are trying to include the delta closure.

1151

if include_delta_closure:

1152

needed_from_fallback = set()

1153

# Build up reconstructable_keys dict. key:True in this dict means

1154

# the key can be reconstructed.

1155

reconstructable_keys = {}

1156

for key in keys:

1157

# the delta chain

1158

try:

1159

chain = [key, positions[key][2]]

1160

except KeyError:

1161

needed_from_fallback.add(key)

1162

continue

1163

result = True

1164

while chain[-1] is not None:

1165

if chain[-1] in reconstructable_keys:

1166

result = reconstructable_keys[chain[-1]]

1167

break

1168

else:

1169

try:

1170

chain.append(positions[chain[-1]][2])

1171

except KeyError:

1172

# missing basis component

1173

needed_from_fallback.add(chain[-1])

1174

result = True

1175

break

1176

for chain_key in chain[:-1]:

1177

reconstructable_keys[chain_key] = result

1178

if not result:

1179

needed_from_fallback.add(key)

1180

# Double index lookups here : need a unified api ?

1181

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1182

if ordering == 'topological':

1183

# Global topological sort

1184

present_keys = tsort.topo_sort(global_map)

1185

# Now group by source:

1186

source_keys = []

1187

current_source = None

1188

for key in present_keys:

1189

for parent_map in parent_maps:

1190

if key in parent_map:

1191

key_source = parent_map

1192

break

1193

if current_source is not key_source:

1194

source_keys.append((key_source, []))

1195

current_source = key_source

1196

source_keys[-1][1].append(key)

1197

else:

1198

if ordering != 'unordered':

1199

raise AssertionError('valid values for ordering are:'

1200

' "unordered" or "topological" not: %r'

1201

% (ordering,))

1202

# Just group by source; remote sources first.

1203

present_keys = []

1204

source_keys = []

1205

for parent_map in reversed(parent_maps):

1206

source_keys.append((parent_map, []))

1207

for key in parent_map:

1208

present_keys.append(key)

1209

source_keys[-1][1].append(key)

1210

absent_keys = keys - set(global_map)

1211

for key in absent_keys:

1212

yield AbsentContentFactory(key)

1213

# restrict our view to the keys we can answer.

1214

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1215

# XXX: At that point we need to consider the impact of double reads by

1216

# utilising components multiple times.

1217

if include_delta_closure:

1218

# XXX: get_content_maps performs its own index queries; allow state

1219

# to be passed in.

1220

text_map, _ = self._get_content_maps(present_keys,

1221

needed_from_fallback - absent_keys)

1222

for key in present_keys:

1223

yield FulltextContentFactory(key, global_map[key], None,

1224

''.join(text_map[key]))

1225

else:

1226

for source, keys in source_keys:

1227

if source is parent_maps[0]:

1228

# this KnitVersionedFiles

1229

records = [(key, positions[key][1]) for key in keys]

1230

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1231

(record_details, index_memo, _) = positions[key]

1232

yield KnitContentFactory(key, global_map[key],

1233

record_details, sha1, raw_data, self._factory.annotated, None)

1234

else:

1235

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1236

for record in vf.get_record_stream(keys, ordering,

1237

include_delta_closure):

1238

yield record

1239

1240

def get_sha1s(self, keys):

1241

"""See VersionedFiles.get_sha1s()."""

1242

missing = set(keys)

1243

record_map = self._get_record_map(missing, allow_missing=True)

1244

result = {}

1245

for key, details in record_map.iteritems():

1246

if key not in missing:

1247

continue

1248

# record entry 2 is the 'digest'.

1249

result[key] = details[2]

1250

missing.difference_update(set(result))

1251

for source in self._fallback_vfs:

1252

if not missing:

1253

break

1254

new_result = source.get_sha1s(missing)

1255

result.update(new_result)

1256

missing.difference_update(set(new_result))

1257

return result

1258

1259

def insert_record_stream(self, stream):

1260

"""Insert a record stream into this container.

1261

1262

:param stream: A stream of records to insert.

1263

:return: None

1264

:seealso VersionedFiles.get_record_stream:

1265

"""

1266

def get_adapter(adapter_key):

1267

try:

1268

return adapters[adapter_key]

1269

except KeyError:

1270

adapter_factory = adapter_registry.get(adapter_key)

1271

adapter = adapter_factory(self)

1272

adapters[adapter_key] = adapter

1273

return adapter

1274

if self._factory.annotated:

1275

# self is annotated, we need annotated knits to use directly.

1276

annotated = "annotated-"

1277

convertibles = []

1278

else:

1279

# self is not annotated, but we can strip annotations cheaply.

1280

annotated = ""

1281

convertibles = set(["knit-annotated-ft-gz"])

1282

if self._max_delta_chain:

1283

convertibles.add("knit-annotated-delta-gz")

1284

# The set of types we can cheaply adapt without needing basis texts.

1285

native_types = set()

1286

if self._max_delta_chain:

1287

native_types.add("knit-%sdelta-gz" % annotated)

1288

native_types.add("knit-%sft-gz" % annotated)

1289

knit_types = native_types.union(convertibles)

1290

adapters = {}

1291

# Buffer all index entries that we can't add immediately because their

1292

# basis parent is missing. We don't buffer all because generating

1293

# annotations may require access to some of the new records. However we

1294

# can't generate annotations from new deltas until their basis parent

1295

# is present anyway, so we get away with not needing an index that

1296

# includes the new keys.

1297

# key = basis_parent, value = index entry to add

1298

buffered_index_entries = {}

1299

for record in stream:

1300

parents = record.parents

1301

# Raise an error when a record is missing.

1302

if record.storage_kind == 'absent':

1303

raise RevisionNotPresent([record.key], self)

1304

if record.storage_kind in knit_types:

1305

if record.storage_kind not in native_types:

1306

try:

1307

adapter_key = (record.storage_kind, "knit-delta-gz")

1308

adapter = get_adapter(adapter_key)

1309

except KeyError:

1310

adapter_key = (record.storage_kind, "knit-ft-gz")

1311

adapter = get_adapter(adapter_key)

1312

bytes = adapter.get_bytes(

1313

record, record.get_bytes_as(record.storage_kind))

1314

else:

1315

bytes = record.get_bytes_as(record.storage_kind)

1316

options = [record._build_details[0]]

1317

if record._build_details[1]:

1318

options.append('no-eol')

1319

# Just blat it across.

1320

# Note: This does end up adding data on duplicate keys. As

1321

# modern repositories use atomic insertions this should not

1322

# lead to excessive growth in the event of interrupted fetches.

1323

# 'knit' repositories may suffer excessive growth, but as a

1324

# deprecated format this is tolerable. It can be fixed if

1325

# needed by in the kndx index support raising on a duplicate

1326

# add with identical parents and options.

1327

access_memo = self._access.add_raw_records(

1328

[(record.key, len(bytes))], bytes)[0]

1329

index_entry = (record.key, options, access_memo, parents)

1330

buffered = False

1331

if 'fulltext' not in options:

1332

basis_parent = parents[0]

1333

# Note that pack backed knits don't need to buffer here

1334

# because they buffer all writes to the transaction level,

1335

# but we don't expose that difference at the index level. If

1336

# the query here has sufficient cost to show up in

1337

# profiling we should do that.

1338

if basis_parent not in self.get_parent_map([basis_parent]):

1339

pending = buffered_index_entries.setdefault(

1340

basis_parent, [])

1341

pending.append(index_entry)

1342

buffered = True

1343

if not buffered:

1344

self._index.add_records([index_entry])

1345

elif record.storage_kind == 'fulltext':

1346

self.add_lines(record.key, parents,

1347

split_lines(record.get_bytes_as('fulltext')))

1348

else:

1349

adapter_key = record.storage_kind, 'fulltext'

1350

adapter = get_adapter(adapter_key)

1351

lines = split_lines(adapter.get_bytes(

1352

record, record.get_bytes_as(record.storage_kind)))

1353

try:

1354

self.add_lines(record.key, parents, lines)

1355

except errors.RevisionAlreadyPresent:

1356

pass

1357

# Add any records whose basis parent is now available.

1358

added_keys = [record.key]

1359

while added_keys:

1360

key = added_keys.pop(0)

1361

if key in buffered_index_entries:

1362

index_entries = buffered_index_entries[key]

1363

self._index.add_records(index_entries)

1364

added_keys.extend(

1365

[index_entry[0] for index_entry in index_entries])

1366

del buffered_index_entries[key]

1367

# If there were any deltas which had a missing basis parent, error.

1368

if buffered_index_entries:

1369

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1370

self)

1371

1372

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1373

"""Iterate over the lines in the versioned files from keys.

1374

1375

This may return lines from other keys. Each item the returned

1376

iterator yields is a tuple of a line and a text version that that line

1377

is present in (not introduced in).

1378

1379

Ordering of results is in whatever order is most suitable for the

1380

underlying storage format.

1381

1382

If a progress bar is supplied, it may be used to indicate progress.

1383

The caller is responsible for cleaning up progress bars (because this

1384

is an iterator).

1385

1386

NOTES:

1387

* Lines are normalised by the underlying store: they will all have \n

1388

terminators.

1389

* Lines are returned in arbitrary order.

1390

1391

:return: An iterator over (line, key).

1392

"""

1055

@staticmethod

1056

def _apply_delta(lines, delta):

1057

"""Apply delta to lines."""

1058

lines = list(lines)

1059

offset = 0

1060

for start, end, count, delta_lines in delta:

1061

lines[offset+start:offset+end] = delta_lines

1062

offset = offset + (start - end) + count

1063

return lines

1064

1065

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1066

pb=None):

1067

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1068

if version_ids is None:

1069

version_ids = self.versions()

1393

1070

if pb is None:

1394

1071

pb = progress.DummyProgress()

1395

keys = set(keys)

1396

total = len(keys)

1397

1072

# we don't care about inclusions, the caller cares.

1398

1073

# but we need to setup a list of records to visit.

1399

# we need key, position, length

1400

key_records = []

1401

build_details = self._index.get_build_details(keys)

1402

for key, details in build_details.iteritems():

1403

if key in keys:

1404

key_records.append((key, details[0]))

1405

keys.remove(key)

1406

records_iter = enumerate(self._read_records_iter(key_records))

1407

for (key_idx, (key, data, sha_value)) in records_iter:

1408

pb.update('Walking content.', key_idx, total)

1409

compression_parent = build_details[key][1]

1410

if compression_parent is None:

1411

# fulltext

1412

line_iterator = self._factory.get_fulltext_content(data)

1074

# we need version_id, position, length

1075

version_id_records = []

1076

requested_versions = set(version_ids)

1077

# filter for available versions

1078

for version_id in requested_versions:

1079

if not self.has_version(version_id):

1080

raise RevisionNotPresent(version_id, self.filename)

1081

# get a in-component-order queue:

1082

for version_id in self.versions():

1083

if version_id in requested_versions:

1084

index_memo = self._index.get_position(version_id)

1085

version_id_records.append((version_id, index_memo))

1086

1087

total = len(version_id_records)

1088

for version_idx, (version_id, data, sha_value) in \

1089

enumerate(self._data.read_records_iter(version_id_records)):

1090

pb.update('Walking content.', version_idx, total)

1091

method = self._index.get_method(version_id)

1092

1093

assert method in ('fulltext', 'line-delta')

1094

if method == 'fulltext':

1095

line_iterator = self.factory.get_fulltext_content(data)

1413

1096

else:

1414

# Delta

1415

line_iterator = self._factory.get_linedelta_content(data)

1416

# XXX: It might be more efficient to yield (key,

1417

# line_iterator) in the future. However for now, this is a simpler

1418

# change to integrate into the rest of the codebase. RBC 20071110

1097

line_iterator = self.factory.get_linedelta_content(data)

1419

1098

for line in line_iterator:

1420

yield line, key

1421

for source in self._fallback_vfs:

1422

if not keys:

1423

break

1424

source_keys = set()

1425

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1426

source_keys.add(key)

1427

yield line, key

1428

keys.difference_update(source_keys)

1429

if keys:

1430

raise RevisionNotPresent(keys, self.filename)

1099

yield line

1100

1431

1101

pb.update('Walking content.', total, total)

1432

1433

def _make_line_delta(self, delta_seq, new_content):

1434

"""Generate a line delta from delta_seq and new_content."""

1435

diff_hunks = []

1436

for op in delta_seq.get_opcodes():

1437

if op[0] == 'equal':

1438

continue

1439

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1440

return diff_hunks

1441

1442

def _merge_annotations(self, content, parents, parent_texts={},

1443

delta=None, annotated=None,

1444

left_matching_blocks=None):

1445

"""Merge annotations for content and generate deltas.

1446

1447

This is done by comparing the annotations based on changes to the text

1448

and generating a delta on the resulting full texts. If annotations are

1449

not being created then a simple delta is created.

1450

"""

1451

if left_matching_blocks is not None:

1452

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1453

else:

1454

delta_seq = None

1455

if annotated:

1456

for parent_key in parents:

1457

merge_content = self._get_content(parent_key, parent_texts)

1458

if (parent_key == parents[0] and delta_seq is not None):

1459

seq = delta_seq

1460

else:

1461

seq = patiencediff.PatienceSequenceMatcher(

1462

None, merge_content.text(), content.text())

1463

for i, j, n in seq.get_matching_blocks():

1464

if n == 0:

1465

continue

1466

# this copies (origin, text) pairs across to the new

1467

# content for any line that matches the last-checked

1468

# parent.

1469

content._lines[j:j+n] = merge_content._lines[i:i+n]

1470

# XXX: Robert says the following block is a workaround for a

1471

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1472

if content._lines and content._lines[-1][1][-1] != '\n':

1473

# The copied annotation was from a line without a trailing EOL,

1474

# reinstate one for the content object, to ensure correct

1475

# serialization.

1476

line = content._lines[-1][1] + '\n'

1477

content._lines[-1] = (content._lines[-1][0], line)

1478

if delta:

1479

if delta_seq is None:

1480

reference_content = self._get_content(parents[0], parent_texts)

1481

new_texts = content.text()

1482

old_texts = reference_content.text()

1483

delta_seq = patiencediff.PatienceSequenceMatcher(

1484

None, old_texts, new_texts)

1485

return self._make_line_delta(delta_seq, content)

1486

1487

def _parse_record(self, version_id, data):

1488

"""Parse an original format knit record.

1489

1490

These have the last element of the key only present in the stored data.

1491

"""

1492

rec, record_contents = self._parse_record_unchecked(data)

1493

self._check_header_version(rec, version_id)

1494

return record_contents, rec[3]

1495

1496

def _parse_record_header(self, key, raw_data):

1497

"""Parse a record header for consistency.

1498

1499

:return: the header and the decompressor stream.

1500

as (stream, header_record)

1501

"""

1502

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1503

try:

1504

# Current serialise

1505

rec = self._check_header(key, df.readline())

1506

except Exception, e:

1507

raise KnitCorrupt(self,

1508

"While reading {%s} got %s(%s)"

1509

% (key, e.__class__.__name__, str(e)))

1510

return df, rec

1511

1512

def _parse_record_unchecked(self, data):

1513

# profiling notes:

1514

# 4168 calls in 2880 217 internal

1515

# 4168 calls to _parse_record_header in 2121

1516

# 4168 calls to readlines in 330

1517

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1518

try:

1519

record_contents = df.readlines()

1520

except Exception, e:

1521

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1522

(data, e.__class__.__name__, str(e)))

1523

header = record_contents.pop(0)

1524

rec = self._split_header(header)

1525

last_line = record_contents.pop()

1526

if len(record_contents) != int(rec[2]):

1527

raise KnitCorrupt(self,

1528

'incorrect number of lines %s != %s'

1529

' for version {%s} %s'

1530

% (len(record_contents), int(rec[2]),

1531

rec[1], record_contents))

1532

if last_line != 'end %s\n' % rec[1]:

1533

raise KnitCorrupt(self,

1534

'unexpected version end line %r, wanted %r'

1535

% (last_line, rec[1]))

1536

df.close()

1537

return rec, record_contents

1538

1539

def _read_records_iter(self, records):

1540

"""Read text records from data file and yield result.

1541

1542

The result will be returned in whatever is the fastest to read.

1543

Not by the order requested. Also, multiple requests for the same

1544

record will only yield 1 response.

1545

:param records: A list of (key, access_memo) entries

1546

:return: Yields (key, contents, digest) in the order

1547

read, not the order requested

1548

"""

1549

if not records:

1550

return

1551

1552

# XXX: This smells wrong, IO may not be getting ordered right.

1553

needed_records = sorted(set(records), key=operator.itemgetter(1))

1554

if not needed_records:

1555

return

1556

1557

# The transport optimizes the fetching as well

1558

# (ie, reads continuous ranges.)

1559

raw_data = self._access.get_raw_records(

1560

[index_memo for key, index_memo in needed_records])

1561

1562

for (key, index_memo), data in \

1563

izip(iter(needed_records), raw_data):

1564

content, digest = self._parse_record(key[-1], data)

1565

yield key, content, digest

1566

1567

def _read_records_iter_raw(self, records):

1568

"""Read text records from data file and yield raw data.

1569

1570

This unpacks enough of the text record to validate the id is

1571

as expected but thats all.

1572

1573

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1574

"""

1575

# setup an iterator of the external records:

1576

# uses readv so nice and fast we hope.

1577

if len(records):

1578

# grab the disk data needed.

1579

needed_offsets = [index_memo for key, index_memo

1580

in records]

1581

raw_records = self._access.get_raw_records(needed_offsets)

1582

1583

for key, index_memo in records:

1584

data = raw_records.next()

1585

# validate the header (note that we can only use the suffix in

1586

# current knit records).

1587

df, rec = self._parse_record_header(key, data)

1588

df.close()

1589

yield key, data, rec[3]

1590

1591

def _record_to_data(self, key, digest, lines, dense_lines=None):

1592

"""Convert key, digest, lines into a raw data block.

1593

1594

:param key: The key of the record. Currently keys are always serialised

1595

using just the trailing component.

1596

:param dense_lines: The bytes of lines but in a denser form. For

1597

instance, if lines is a list of 1000 bytestrings each ending in \n,

1598

dense_lines may be a list with one line in it, containing all the

1599

1000's lines and their \n's. Using dense_lines if it is already

1600

known is a win because the string join to create bytes in this

1601

function spends less time resizing the final string.

1602

:return: (len, a StringIO instance with the raw data ready to read.)

1603

"""

1604

# Note: using a string copy here increases memory pressure with e.g.

1605

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1606

# when doing the initial commit of a mozilla tree. RBC 20070921

1607

bytes = ''.join(chain(

1608

["version %s %d %s\n" % (key[-1],

1609

len(lines),

1610

digest)],

1611

dense_lines or lines,

1612

["end %s\n" % key[-1]]))

1613

if type(bytes) != str:

1614

raise AssertionError(

1615

'data must be plain bytes was %s' % type(bytes))

1616

if lines and lines[-1][-1] != '\n':

1617

raise ValueError('corrupt lines value %r' % lines)

1618

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1619

return len(compressed_bytes), compressed_bytes

1620

1621

def _split_header(self, line):

1622

rec = line.split()

1623

if len(rec) != 4:

1624

raise KnitCorrupt(self,

1625

'unexpected number of elements in record header')

1626

return rec

1627

1628

def keys(self):

1629

"""See VersionedFiles.keys."""

1630

if 'evil' in debug.debug_flags:

1631

trace.mutter_callsite(2, "keys scales with size of history")

1632

sources = [self._index] + self._fallback_vfs

1633

result = set()

1634

for source in sources:

1635

result.update(source.keys())

1636

return result

1637

1638

1639

1640

class _KndxIndex(object):

1641

"""Manages knit index files

1642

1643

The index is kept in memory and read on startup, to enable

1102

1103

def iter_parents(self, version_ids):

1104

"""Iterate through the parents for many version ids.

1105

1106

:param version_ids: An iterable yielding version_ids.

1107

:return: An iterator that yields (version_id, parents). Requested

1108

version_ids not present in the versioned file are simply skipped.

1109

The order is undefined, allowing for different optimisations in

1110

the underlying implementation.

1111

"""

1112

return self._index.iter_parents(version_ids)

1113

1114

def num_versions(self):

1115

"""See VersionedFile.num_versions()."""

1116

return self._index.num_versions()

1117

1118

__len__ = num_versions

1119

1120

def annotate_iter(self, version_id):

1121

"""See VersionedFile.annotate_iter."""

1122

return self.factory.annotate_iter(self, version_id)

1123

1124

def get_parents(self, version_id):

1125

"""See VersionedFile.get_parents."""

1126

# perf notes:

1127

# optimism counts!

1128

# 52554 calls in 1264 872 internal down from 3674

1129

try:

1130

return self._index.get_parents(version_id)

1131

except KeyError:

1132

raise RevisionNotPresent(version_id, self.filename)

1133

1134

def get_parents_with_ghosts(self, version_id):

1135

"""See VersionedFile.get_parents."""

1136

try:

1137

return self._index.get_parents_with_ghosts(version_id)

1138

except KeyError:

1139

raise RevisionNotPresent(version_id, self.filename)

1140

1141

def get_ancestry(self, versions, topo_sorted=True):

1142

"""See VersionedFile.get_ancestry."""

1143

if isinstance(versions, basestring):

1144

versions = [versions]

1145

if not versions:

1146

return []

1147

return self._index.get_ancestry(versions, topo_sorted)

1148

1149

def get_ancestry_with_ghosts(self, versions):

1150

"""See VersionedFile.get_ancestry_with_ghosts."""

1151

if isinstance(versions, basestring):

1152

versions = [versions]

1153

if not versions:

1154

return []

1155

return self._index.get_ancestry_with_ghosts(versions)

1156

1157

def plan_merge(self, ver_a, ver_b):

1158

"""See VersionedFile.plan_merge."""

1159

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1160

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1161

annotated_a = self.annotate(ver_a)

1162

annotated_b = self.annotate(ver_b)

1163

return merge._plan_annotate_merge(annotated_a, annotated_b,

1164

ancestors_a, ancestors_b)

1165

1166

1167

class _KnitComponentFile(object):

1168

"""One of the files used to implement a knit database"""

1169

1170

def __init__(self, transport, filename, mode, file_mode=None,

1171

create_parent_dir=False, dir_mode=None):

1172

self._transport = transport

1173

self._filename = filename

1174

self._mode = mode

1175

self._file_mode = file_mode

1176

self._dir_mode = dir_mode

1177

self._create_parent_dir = create_parent_dir

1178

self._need_to_create = False

1179

1180

def _full_path(self):

1181

"""Return the full path to this file."""

1182

return self._transport.base + self._filename

1183

1184

def check_header(self, fp):

1185

line = fp.readline()

1186

if line == '':

1187

# An empty file can actually be treated as though the file doesn't

1188

# exist yet.

1189

raise errors.NoSuchFile(self._full_path())

1190

if line != self.HEADER:

1191

raise KnitHeaderError(badline=line,

1192

filename=self._transport.abspath(self._filename))

1193

1194

def __repr__(self):

1195

return '%s(%s)' % (self.__class__.__name__, self._filename)

1196

1197

1198

class _KnitIndex(_KnitComponentFile):

1199

"""Manages knit index file.

1200

1201

The index is already kept in memory and read on startup, to enable

1644

1202

fast lookups of revision information. The cursor of the index

1645

1203

file is always pointing to the end, making it easy to append

1646

1204

entries.

1688

1246

to ensure that records always start on new lines even if the last write was

1689

1247

interrupted. As a result its normal for the last line in the index to be

1690

1248

missing a trailing newline. One can be added with no harmful effects.

1691

1692

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

1693

where prefix is e.g. the (fileid,) for .texts instances or () for

1694

constant-mapped things like .revisions, and the old state is

1695

tuple(history_vector, cache_dict). This is used to prevent having an

1696

ABI change with the C extension that reads .kndx files.

1697

1249

"""

1698

1250

1699

1251

HEADER = "# bzr knit index 8\n"

1700

1252

1701

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

1702

"""Create a _KndxIndex on transport using mapper."""

1703

self._transport = transport

1704

self._mapper = mapper

1705

self._get_scope = get_scope

1706

self._allow_writes = allow_writes

1707

self._is_locked = is_locked

1708

self._reset_cache()

1709

self.has_graph = True

1710

1711

def add_records(self, records, random_id=False):

1712

"""Add multiple records to the index.

1713

1714

:param records: a list of tuples:

1715

(key, options, access_memo, parents).

1716

:param random_id: If True the ids being added were randomly generated

1717

and no check for existence will be performed.

1718

"""

1719

paths = {}

1720

for record in records:

1721

key = record[0]

1722

prefix = key[:-1]

1723

path = self._mapper.map(key) + '.kndx'

1724

path_keys = paths.setdefault(path, (prefix, []))

1725

path_keys[1].append(record)

1726

for path in sorted(paths):

1727

prefix, path_keys = paths[path]

1728

self._load_prefixes([prefix])

1729

lines = []

1730

orig_history = self._kndx_cache[prefix][1][:]

1731

orig_cache = self._kndx_cache[prefix][0].copy()

1732

1733

try:

1734

for key, options, (_, pos, size), parents in path_keys:

1735

if parents is None:

1736

# kndx indices cannot be parentless.

1737

parents = ()

1738

line = "\n%s %s %s %s %s :" % (

1739

key[-1], ','.join(options), pos, size,

1740

self._dictionary_compress(parents))

1741

if type(line) != str:

1742

raise AssertionError(

1743

'data must be utf8 was %s' % type(line))

1744

lines.append(line)

1745

self._cache_key(key, options, pos, size, parents)

1746

if len(orig_history):

1747

self._transport.append_bytes(path, ''.join(lines))

1748

else:

1749

self._init_index(path, lines)

1750

except:

1751

# If any problems happen, restore the original values and re-raise

1752

self._kndx_cache[prefix] = (orig_cache, orig_history)

1753

raise

1754

1755

def _cache_key(self, key, options, pos, size, parent_keys):

1253

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1254

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1255

1256

def _cache_version(self, version_id, options, pos, size, parents):

1756

1257

"""Cache a version record in the history array and index cache.

1757

1258

1758

1259

This is inlined into _load_data for performance. KEEP IN SYNC.

1759

1260

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1760

1261

indexes).

1761

1262

"""

1762

prefix = key[:-1]

1763

version_id = key[-1]

1764

# last-element only for compatibilty with the C load_data.

1765

parents = tuple(parent[-1] for parent in parent_keys)

1766

for parent in parent_keys:

1767

if parent[:-1] != prefix:

1768

raise ValueError("mismatched prefixes for %r, %r" % (

1769

key, parent_keys))

1770

cache, history = self._kndx_cache[prefix]

1771

1263

# only want the _history index to reference the 1st index entry

1772

1264

# for version_id

1773

if version_id not in cache:

1774

index = len(history)

1775

history.append(version_id)

1265

if version_id not in self._cache:

1266

index = len(self._history)

1267

self._history.append(version_id)

1776

1268

else:

1777

index = cache[version_id][5]

1778

cache[version_id] = (version_id,

1269

index = self._cache[version_id][5]

1270

self._cache[version_id] = (version_id,

1779

1271

options,

1780

1272

pos,

1781

1273

size,

1782

1274

parents,

1783

1275

index)

1784

1276

1785

def check_header(self, fp):

1786

line = fp.readline()

1787

if line == '':

1788

# An empty file can actually be treated as though the file doesn't

1789

# exist yet.

1790

raise errors.NoSuchFile(self)

1791

if line != self.HEADER:

1792

raise KnitHeaderError(badline=line, filename=self)

1793

1794

def _check_read(self):

1795

if not self._is_locked():

1796

raise errors.ObjectNotLocked(self)

1797

if self._get_scope() != self._scope:

1798

self._reset_cache()

1799

1800

def _check_write_ok(self):

1801

"""Assert if not writes are permitted."""

1802

if not self._is_locked():

1803

raise errors.ObjectNotLocked(self)

1804

if self._get_scope() != self._scope:

1805

self._reset_cache()

1806

if self._mode != 'w':

1807

raise errors.ReadOnlyObjectDirtiedError(self)

1808

1809

def get_build_details(self, keys):

1810

"""Get the method, index_memo and compression parent for keys.

1811

1812

Ghosts are omitted from the result.

1813

1814

:param keys: An iterable of keys.

1815

:return: A dict of key:(index_memo, compression_parent, parents,

1816

record_details).

1817

index_memo

1818

opaque structure to pass to read_records to extract the raw

1819

data

1820

compression_parent

1821

Content that this record is built upon, may be None

1822

parents

1823

Logical parents of this node

1824

record_details

1825

extra information about the content which needs to be passed to

1826

Factory.parse_record

1827

"""

1828

prefixes = self._partition_keys(keys)

1829

parent_map = self.get_parent_map(keys)

1830

result = {}

1831

for key in keys:

1832

if key not in parent_map:

1833

continue # Ghost

1834

method = self.get_method(key)

1835

parents = parent_map[key]

1836

if method == 'fulltext':

1837

compression_parent = None

1838

else:

1839

compression_parent = parents[0]

1840

noeol = 'no-eol' in self.get_options(key)

1841

index_memo = self.get_position(key)

1842

result[key] = (index_memo, compression_parent,

1843

parents, (method, noeol))

1844

return result

1845

1846

def get_method(self, key):

1847

"""Return compression method of specified key."""

1848

options = self.get_options(key)

1849

if 'fulltext' in options:

1850

return 'fulltext'

1851

elif 'line-delta' in options:

1852

return 'line-delta'

1853

else:

1854

raise errors.KnitIndexUnknownMethod(self, options)

1855

1856

def get_options(self, key):

1857

"""Return a list representing options.

1858

1859

e.g. ['foo', 'bar']

1860

"""

1861

prefix, suffix = self._split_key(key)

1862

self._load_prefixes([prefix])

1277

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1278

create_parent_dir=False, delay_create=False, dir_mode=None):

1279

_KnitComponentFile.__init__(self, transport, filename, mode,

1280

file_mode=file_mode,

1281

create_parent_dir=create_parent_dir,

1282

dir_mode=dir_mode)

1283

self._cache = {}

1284

# position in _history is the 'official' index for a revision

1285

# but the values may have come from a newer entry.

1286

# so - wc -l of a knit index is != the number of unique names

1287

# in the knit.

1288

self._history = []

1863

1289

try:

1864

return self._kndx_cache[prefix][0][suffix][1]

1865

except KeyError:

1866

raise RevisionNotPresent(key, self)

1867

1868

def get_parent_map(self, keys):

1869

"""Get a map of the parents of keys.

1870

1871

:param keys: The keys to look up parents for.

1872

:return: A mapping from keys to parents. Absent keys are absent from

1873

the mapping.

1290

fp = self._transport.get(self._filename)

1291

try:

1292

# _load_data may raise NoSuchFile if the target knit is

1293

# completely empty.

1294

_load_data(self, fp)

1295

finally:

1296

fp.close()

1297

except NoSuchFile:

1298

if mode != 'w' or not create:

1299

raise

1300

elif delay_create:

1301

self._need_to_create = True

1302

else:

1303

self._transport.put_bytes_non_atomic(

1304

self._filename, self.HEADER, mode=self._file_mode)

1305

1306

def get_graph(self):

1307

"""Return a list of the node:parents lists from this knit index."""

1308

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1309

1310

def get_ancestry(self, versions, topo_sorted=True):

1311

"""See VersionedFile.get_ancestry."""

1312

# get a graph of all the mentioned versions:

1313

graph = {}

1314

pending = set(versions)

1315

cache = self._cache

1316

while pending:

1317

version = pending.pop()

1318

# trim ghosts

1319

try:

1320

parents = [p for p in cache[version][4] if p in cache]

1321

except KeyError:

1322

raise RevisionNotPresent(version, self._filename)

1323

# if not completed and not a ghost

1324

pending.update([p for p in parents if p not in graph])

1325

graph[version] = parents

1326

if not topo_sorted:

1327

return graph.keys()

1328

return topo_sort(graph.items())

1329

1330

def get_ancestry_with_ghosts(self, versions):

1331

"""See VersionedFile.get_ancestry_with_ghosts."""

1332

# get a graph of all the mentioned versions:

1333

self.check_versions_present(versions)

1334

cache = self._cache

1335

graph = {}

1336

pending = set(versions)

1337

while pending:

1338

version = pending.pop()

1339

try:

1340

parents = cache[version][4]

1341

except KeyError:

1342

# ghost, fake it

1343

graph[version] = []

1344

else:

1345

# if not completed

1346

pending.update([p for p in parents if p not in graph])

1347

graph[version] = parents

1348

return topo_sort(graph.items())

1349

1350

def iter_parents(self, version_ids):

1351

"""Iterate through the parents for many version ids.

1352

1353

:param version_ids: An iterable yielding version_ids.

1354

:return: An iterator that yields (version_id, parents). Requested

1355

version_ids not present in the versioned file are simply skipped.

1356

The order is undefined, allowing for different optimisations in

1357

the underlying implementation.

1874

1358

"""

1875

# Parse what we need to up front, this potentially trades off I/O

1876

# locality (.kndx and .knit in the same block group for the same file

1877

# id) for less checking in inner loops.

1878

prefixes = set(key[:-1] for key in keys)

1879

self._load_prefixes(prefixes)

1880

result = {}

1881

for key in keys:

1882

prefix = key[:-1]

1359

for version_id in version_ids:

1883

1360

try:

1884

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

1361

yield version_id, tuple(self.get_parents(version_id))

1885

1362

except KeyError:

1886

1363

pass

1887

else:

1888

result[key] = tuple(prefix + (suffix,) for

1889

suffix in suffix_parents)

1890

return result

1891

1892

def get_position(self, key):

1893

"""Return details needed to access the version.

1894

1895

:return: a tuple (key, data position, size) to hand to the access

1896

logic to get the record.

1897

"""

1898

prefix, suffix = self._split_key(key)

1899

self._load_prefixes([prefix])

1900

entry = self._kndx_cache[prefix][0][suffix]

1901

return key, entry[2], entry[3]

1902

1903

def _init_index(self, path, extra_lines=[]):

1904

"""Initialize an index."""

1905

sio = StringIO()

1906

sio.write(self.HEADER)

1907

sio.writelines(extra_lines)

1908

sio.seek(0)

1909

self._transport.put_file_non_atomic(path, sio,

1910

create_parent_dir=True)

1911

# self._create_parent_dir)

1912

# mode=self._file_mode,

1913

# dir_mode=self._dir_mode)

1914

1915

def keys(self):

1916

"""Get all the keys in the collection.

1917

1918

The keys are not ordered.

1919

"""

1920

result = set()

1921

# Identify all key prefixes.

1922

# XXX: A bit hacky, needs polish.

1923

if type(self._mapper) == ConstantMapper:

1924

prefixes = [()]

1925

else:

1926

relpaths = set()

1927

for quoted_relpath in self._transport.iter_files_recursive():

1928

path, ext = os.path.splitext(quoted_relpath)

1929

relpaths.add(path)

1930

prefixes = [self._mapper.unmap(path) for path in relpaths]

1931

self._load_prefixes(prefixes)

1932

for prefix in prefixes:

1933

for suffix in self._kndx_cache[prefix][1]:

1934

result.add(prefix + (suffix,))

1935

return result

1936

1937

def _load_prefixes(self, prefixes):

1938

"""Load the indices for prefixes."""

1939

self._check_read()

1940

for prefix in prefixes:

1941

if prefix not in self._kndx_cache:

1942

# the load_data interface writes to these variables.

1943

self._cache = {}

1944

self._history = []

1945

self._filename = prefix

1946

try:

1947

path = self._mapper.map(prefix) + '.kndx'

1948

fp = self._transport.get(path)

1949

try:

1950

# _load_data may raise NoSuchFile if the target knit is

1951

# completely empty.

1952

_load_data(self, fp)

1953

finally:

1954

fp.close()

1955

self._kndx_cache[prefix] = (self._cache, self._history)

1956

del self._cache

1957

del self._filename

1958

del self._history

1959

except NoSuchFile:

1960

self._kndx_cache[prefix] = ({}, [])

1961

if type(self._mapper) == ConstantMapper:

1962

# preserve behaviour for revisions.kndx etc.

1963

self._init_index(path)

1964

del self._cache

1965

del self._filename

1966

del self._history

1967

1968

def _partition_keys(self, keys):

1969

"""Turn keys into a dict of prefix:suffix_list."""

1970

result = {}

1971

for key in keys:

1972

prefix_keys = result.setdefault(key[:-1], [])

1973

prefix_keys.append(key[-1])

1974

return result

1975

1976

def _dictionary_compress(self, keys):

1977

"""Dictionary compress keys.

1978

1979

:param keys: The keys to generate references to.

1980

:return: A string representation of keys. keys which are present are

1981

dictionary compressed, and others are emitted as fulltext with a

1982

'.' prefix.

1983

"""

1984

if not keys:

1985

return ''

1364

1365

def num_versions(self):

1366

return len(self._history)

1367

1368

__len__ = num_versions

1369

1370

def get_versions(self):

1371

"""Get all the versions in the file. not topologically sorted."""

1372

return self._history

1373

1374

def _version_list_to_index(self, versions):

1986

1375

result_list = []

1987

prefix = keys[0][:-1]

1988

cache = self._kndx_cache[prefix][0]

1989

for key in keys:

1990

if key[:-1] != prefix:

1991

# kndx indices cannot refer across partitioned storage.

1992

raise ValueError("mismatched prefixes for %r" % keys)

1993

if key[-1] in cache:

1376

cache = self._cache

1377

for version in versions:

1378

if version in cache:

1994

1379

# -- inlined lookup() --

1995

result_list.append(str(cache[key[-1]][5]))

1380

result_list.append(str(cache[version][5]))

1996

1381

# -- end lookup () --

1997

1382

else:

1998

result_list.append('.' + key[-1])

1383

result_list.append('.' + version)

1999

1384

return ' '.join(result_list)

2000

1385

2001

def _reset_cache(self):

2002

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2003

# (cache_dict, history_vector) for parsed kndx files.

2004

self._kndx_cache = {}

2005

self._scope = self._get_scope()

2006

allow_writes = self._allow_writes()

2007

if allow_writes:

2008

self._mode = 'w'

1386

def add_version(self, version_id, options, index_memo, parents):

1387

"""Add a version record to the index."""

1388

self.add_versions(((version_id, options, index_memo, parents),))

1389

1390

def add_versions(self, versions, random_id=False):

1391

"""Add multiple versions to the index.

1392

1393

:param versions: a list of tuples:

1394

(version_id, options, pos, size, parents).

1395

:param random_id: If True the ids being added were randomly generated

1396

and no check for existence will be performed.

1397

"""

1398

lines = []

1399

orig_history = self._history[:]

1400

orig_cache = self._cache.copy()

1401

1402

try:

1403

for version_id, options, (index, pos, size), parents in versions:

1404

line = "\n%s %s %s %s %s :" % (version_id,

1405

','.join(options),

1406

pos,

1407

size,

1408

self._version_list_to_index(parents))

1409

assert isinstance(line, str), \

1410

'content must be utf-8 encoded: %r' % (line,)

1411

lines.append(line)

1412

self._cache_version(version_id, options, pos, size, parents)

1413

if not self._need_to_create:

1414

self._transport.append_bytes(self._filename, ''.join(lines))

1415

else:

1416

sio = StringIO()

1417

sio.write(self.HEADER)

1418

sio.writelines(lines)

1419

sio.seek(0)

1420

self._transport.put_file_non_atomic(self._filename, sio,

1421

create_parent_dir=self._create_parent_dir,

1422

mode=self._file_mode,

1423

dir_mode=self._dir_mode)

1424

self._need_to_create = False

1425

except:

1426

# If any problems happen, restore the original values and re-raise

1427

self._history = orig_history

1428

self._cache = orig_cache

1429

raise

1430

1431

def has_version(self, version_id):

1432

"""True if the version is in the index."""

1433

return version_id in self._cache

1434

1435

def get_position(self, version_id):

1436

"""Return details needed to access the version.

1437

1438

.kndx indices do not support split-out data, so return None for the

1439

index field.

1440

1441

:return: a tuple (None, data position, size) to hand to the access

1442

logic to get the record.

1443

"""

1444

entry = self._cache[version_id]

1445

return None, entry[2], entry[3]

1446

1447

def get_method(self, version_id):

1448

"""Return compression method of specified version."""

1449

try:

1450

options = self._cache[version_id][1]

1451

except KeyError:

1452

raise RevisionNotPresent(version_id, self._filename)

1453

if 'fulltext' in options:

1454

return 'fulltext'

2009

1455

else:

2010

self._mode = 'r'

2011

2012

def _split_key(self, key):

2013

"""Split key into a prefix and suffix."""

2014

return key[:-1], key[-1]

2015

2016

2017

class _KnitGraphIndex(object):

2018

"""A KnitVersionedFiles index layered on GraphIndex."""

2019

2020

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2021

add_callback=None):

1456

if 'line-delta' not in options:

1457

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1458

return 'line-delta'

1459

1460

def get_options(self, version_id):

1461

"""Return a string represention options.

1462

1463

e.g. foo,bar

1464

"""

1465

return self._cache[version_id][1]

1466

1467

def get_parents(self, version_id):

1468

"""Return parents of specified version ignoring ghosts."""

1469

return [parent for parent in self._cache[version_id][4]

1470

if parent in self._cache]

1471

1472

def get_parents_with_ghosts(self, version_id):

1473

"""Return parents of specified version with ghosts."""

1474

return self._cache[version_id][4]

1475

1476

def check_versions_present(self, version_ids):

1477

"""Check that all specified versions are present."""

1478

cache = self._cache

1479

for version_id in version_ids:

1480

if version_id not in cache:

1481

raise RevisionNotPresent(version_id, self._filename)

1482

1483

1484

class KnitGraphIndex(object):

1485

"""A knit index that builds on GraphIndex."""

1486

1487

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

2022

1488

"""Construct a KnitGraphIndex on a graph_index.

2023

1489

2024

1490

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2025

:param is_locked: A callback to check whether the object should answer

2026

queries.

2027

1491

:param deltas: Allow delta-compressed records.

2028

:param parents: If True, record knits parents, if not do not record

2029

parents.

2030

1492

:param add_callback: If not None, allow additions to the index and call

2031

1493

this callback with a list of added GraphIndex nodes:

2032

1494

[(node, value, node_refs), ...]

2033

:param is_locked: A callback, returns True if the index is locked and

2034

thus usable.

1495

:param parents: If True, record knits parents, if not do not record

1496

parents.

2035

1497

"""

2036

self._add_callback = add_callback

2037

1498

self._graph_index = graph_index

2038

1499

self._deltas = deltas

1500

self._add_callback = add_callback

2039

1501

self._parents = parents

2040

1502

if deltas and not parents:

2041

# XXX: TODO: Delta tree and parent graph should be conceptually

2042

# separate.

2043

1503

raise KnitCorrupt(self, "Cannot do delta compression without "

2044

1504

"parent tracking.")

2045

self.has_graph = parents

2046

self._is_locked = is_locked

2047

2048

def __repr__(self):

2049

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2050

2051

def add_records(self, records, random_id=False):

2052

"""Add multiple records to the index.

1505

1506

def _get_entries(self, keys, check_present=False):

1507

"""Get the entries for keys.

1508

1509

:param keys: An iterable of index keys, - 1-tuples.

1510

"""

1511

keys = set(keys)

1512

found_keys = set()

1513

if self._parents:

1514

for node in self._graph_index.iter_entries(keys):

1515

yield node

1516

found_keys.add(node[1])

1517

else:

1518

# adapt parentless index to the rest of the code.

1519

for node in self._graph_index.iter_entries(keys):

1520

yield node[0], node[1], node[2], ()

1521

found_keys.add(node[1])

1522

if check_present:

1523

missing_keys = keys.difference(found_keys)

1524

if missing_keys:

1525

raise RevisionNotPresent(missing_keys.pop(), self)

1526

1527

def _present_keys(self, version_ids):

1528

return set([

1529

node[1] for node in self._get_entries(version_ids)])

1530

1531

def _parentless_ancestry(self, versions):

1532

"""Honour the get_ancestry API for parentless knit indices."""

1533

wanted_keys = self._version_ids_to_keys(versions)

1534

present_keys = self._present_keys(wanted_keys)

1535

missing = set(wanted_keys).difference(present_keys)

1536

if missing:

1537

raise RevisionNotPresent(missing.pop(), self)

1538

return list(self._keys_to_version_ids(present_keys))

1539

1540

def get_ancestry(self, versions, topo_sorted=True):

1541

"""See VersionedFile.get_ancestry."""

1542

if not self._parents:

1543

return self._parentless_ancestry(versions)

1544

# XXX: This will do len(history) index calls - perhaps

1545

# it should be altered to be a index core feature?

1546

# get a graph of all the mentioned versions:

1547

graph = {}

1548

ghosts = set()

1549

versions = self._version_ids_to_keys(versions)

1550

pending = set(versions)

1551

while pending:

1552

# get all pending nodes

1553

this_iteration = pending

1554

new_nodes = self._get_entries(this_iteration)

1555

found = set()

1556

pending = set()

1557

for (index, key, value, node_refs) in new_nodes:

1558

# dont ask for ghosties - otherwise

1559

# we we can end up looping with pending

1560

# being entirely ghosted.

1561

graph[key] = [parent for parent in node_refs[0]

1562

if parent not in ghosts]

1563

# queue parents

1564

for parent in graph[key]:

1565

# dont examine known nodes again

1566

if parent in graph:

1567

continue

1568

pending.add(parent)

1569

found.add(key)

1570

ghosts.update(this_iteration.difference(found))

1571

if versions.difference(graph):

1572

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1573

if topo_sorted:

1574

result_keys = topo_sort(graph.items())

1575

else:

1576

result_keys = graph.iterkeys()

1577

return [key[0] for key in result_keys]

1578

1579

def get_ancestry_with_ghosts(self, versions):

1580

"""See VersionedFile.get_ancestry."""

1581

if not self._parents:

1582

return self._parentless_ancestry(versions)

1583

# XXX: This will do len(history) index calls - perhaps

1584

# it should be altered to be a index core feature?

1585

# get a graph of all the mentioned versions:

1586

graph = {}

1587

versions = self._version_ids_to_keys(versions)

1588

pending = set(versions)

1589

while pending:

1590

# get all pending nodes

1591

this_iteration = pending

1592

new_nodes = self._get_entries(this_iteration)

1593

pending = set()

1594

for (index, key, value, node_refs) in new_nodes:

1595

graph[key] = node_refs[0]

1596

# queue parents

1597

for parent in graph[key]:

1598

# dont examine known nodes again

1599

if parent in graph:

1600

continue

1601

pending.add(parent)

1602

missing_versions = this_iteration.difference(graph)

1603

missing_needed = versions.intersection(missing_versions)

1604

if missing_needed:

1605

raise RevisionNotPresent(missing_needed.pop(), self)

1606

for missing_version in missing_versions:

1607

# add a key, no parents

1608

graph[missing_version] = []

1609

pending.discard(missing_version) # don't look for it

1610

result_keys = topo_sort(graph.items())

1611

return [key[0] for key in result_keys]

1612

1613

def get_graph(self):

1614

"""Return a list of the node:parents lists from this knit index."""

1615

if not self._parents:

1616

return [(key, ()) for key in self.get_versions()]

1617

result = []

1618

for index, key, value, refs in self._graph_index.iter_all_entries():

1619

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1620

return result

1621

1622

def iter_parents(self, version_ids):

1623

"""Iterate through the parents for many version ids.

1624

1625

:param version_ids: An iterable yielding version_ids.

1626

:return: An iterator that yields (version_id, parents). Requested

1627

version_ids not present in the versioned file are simply skipped.

1628

The order is undefined, allowing for different optimisations in

1629

the underlying implementation.

1630

"""

1631

if self._parents:

1632

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1633

all_parents = set()

1634

present_parents = set()

1635

for node in all_nodes:

1636

all_parents.update(node[3][0])

1637

# any node we are querying must be present

1638

present_parents.add(node[1])

1639

unknown_parents = all_parents.difference(present_parents)

1640

present_parents.update(self._present_keys(unknown_parents))

1641

for node in all_nodes:

1642

parents = []

1643

for parent in node[3][0]:

1644

if parent in present_parents:

1645

parents.append(parent[0])

1646

yield node[1][0], tuple(parents)

1647

else:

1648

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1649

yield node[1][0], ()

1650

1651

def num_versions(self):

1652

return len(list(self._graph_index.iter_all_entries()))

1653

1654

__len__ = num_versions

1655

1656

def get_versions(self):

1657

"""Get all the versions in the file. not topologically sorted."""

1658

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1659

1660

def has_version(self, version_id):

1661

"""True if the version is in the index."""

1662

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1663

1664

def _keys_to_version_ids(self, keys):

1665

return tuple(key[0] for key in keys)

1666

1667

def get_position(self, version_id):

1668

"""Return details needed to access the version.

1669

1670

:return: a tuple (index, data position, size) to hand to the access

1671

logic to get the record.

1672

"""

1673

node = self._get_node(version_id)

1674

bits = node[2][1:].split(' ')

1675

return node[0], int(bits[0]), int(bits[1])

1676

1677

def get_method(self, version_id):

1678

"""Return compression method of specified version."""

1679

if not self._deltas:

1680

return 'fulltext'

1681

return self._parent_compression(self._get_node(version_id)[3][1])

1682

1683

def _parent_compression(self, reference_list):

1684

# use the second reference list to decide if this is delta'd or not.

1685

if len(reference_list):

1686

return 'line-delta'

1687

else:

1688

return 'fulltext'

1689

1690

def _get_node(self, version_id):

1691

try:

1692

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1693

except IndexError:

1694

raise RevisionNotPresent(version_id, self)

1695

1696

def get_options(self, version_id):

1697

"""Return a string represention options.

1698

1699

e.g. foo,bar

1700

"""

1701

node = self._get_node(version_id)

1702

if not self._deltas:

1703

options = ['fulltext']

1704

else:

1705

options = [self._parent_compression(node[3][1])]

1706

if node[2][0] == 'N':

1707

options.append('no-eol')

1708

return options

1709

1710

def get_parents(self, version_id):

1711

"""Return parents of specified version ignoring ghosts."""

1712

parents = list(self.iter_parents([version_id]))

1713

if not parents:

1714

# missing key

1715

raise errors.RevisionNotPresent(version_id, self)

1716

return parents[0][1]

1717

1718

def get_parents_with_ghosts(self, version_id):

1719

"""Return parents of specified version with ghosts."""

1720

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1721

check_present=True))

1722

if not self._parents:

1723

return ()

1724

return self._keys_to_version_ids(nodes[0][3][0])

1725

1726

def check_versions_present(self, version_ids):

1727

"""Check that all specified versions are present."""

1728

keys = self._version_ids_to_keys(version_ids)

1729

present = self._present_keys(keys)

1730

missing = keys.difference(present)

1731

if missing:

1732

raise RevisionNotPresent(missing.pop(), self)

1733

1734

def add_version(self, version_id, options, access_memo, parents):

1735

"""Add a version record to the index."""

1736

return self.add_versions(((version_id, options, access_memo, parents),))

1737

1738

def add_versions(self, versions, random_id=False):

1739

"""Add multiple versions to the index.

2053

1740

2054

1741

This function does not insert data into the Immutable GraphIndex

2055

1742

backing the KnitGraphIndex, instead it prepares data for insertion by

2056

1743

the caller and checks that it is safe to insert then calls

2057

1744

self._add_callback with the prepared GraphIndex nodes.

2058

1745

2059

:param records: a list of tuples:

2060

(key, options, access_memo, parents).

1746

:param versions: a list of tuples:

1747

(version_id, options, pos, size, parents).

2061

1748

:param random_id: If True the ids being added were randomly generated

2062

1749

and no check for existence will be performed.

2063

1750

"""

2065

1752

raise errors.ReadOnlyError(self)

2066

1753

# we hope there are no repositories with inconsistent parentage

2067

1754

# anymore.

1755

# check for dups

2068

1756

2069

1757

keys = {}

2070

for (key, options, access_memo, parents) in records:

2071

if self._parents:

2072

parents = tuple(parents)

1758

for (version_id, options, access_memo, parents) in versions:

2073

1759

index, pos, size = access_memo

1760

key = (version_id, )

1761

parents = tuple((parent, ) for parent in parents)

2074

1762

if 'no-eol' in options:

2075

1763

value = 'N'

2076

1764

else:

2093

1781

"in parentless index.")

2094

1782

node_refs = ()

2095

1783

keys[key] = (value, node_refs)

2096

# check for dups

2097

1784

if not random_id:

2098

1785

present_nodes = self._get_entries(keys)

2099

1786

for (index, key, value, node_refs) in present_nodes:

2100

if (value[0] != keys[key][0][0] or

2101

node_refs != keys[key][1]):

2102

raise KnitCorrupt(self, "inconsistent details in add_records"

1787

if (value, node_refs) != keys[key]:

1788

raise KnitCorrupt(self, "inconsistent details in add_versions"

2103

1789

": %s %s" % ((value, node_refs), keys[key]))

2104

1790

del keys[key]

2105

1791

result = []

2111

1797

result.append((key, value))

2112

1798

self._add_callback(result)

2113

1799

2114

def _check_read(self):

2115

"""raise if reads are not permitted."""

2116

if not self._is_locked():

2117

raise errors.ObjectNotLocked(self)

2118

2119

def _check_write_ok(self):

2120

"""Assert if writes are not permitted."""

2121

if not self._is_locked():

2122

raise errors.ObjectNotLocked(self)

2123

2124

def _compression_parent(self, an_entry):

2125

# return the key that an_entry is compressed against, or None

2126

# Grab the second parent list (as deltas implies parents currently)

2127

compression_parents = an_entry[3][1]

2128

if not compression_parents:

2129

return None

2130

if len(compression_parents) != 1:

2131

raise AssertionError(

2132

"Too many compression parents: %r" % compression_parents)

2133

return compression_parents[0]

2134

2135

def get_build_details(self, keys):

2136

"""Get the method, index_memo and compression parent for version_ids.

2137

2138

Ghosts are omitted from the result.

2139

2140

:param keys: An iterable of keys.

2141

:return: A dict of key:

2142

(index_memo, compression_parent, parents, record_details).

2143

index_memo

2144

opaque structure to pass to read_records to extract the raw

2145

data

2146

compression_parent

2147

Content that this record is built upon, may be None

2148

parents

2149

Logical parents of this node

2150

record_details

2151

extra information about the content which needs to be passed to

2152

Factory.parse_record

2153

"""

2154

self._check_read()

2155

result = {}

2156

entries = self._get_entries(keys, False)

2157

for entry in entries:

2158

key = entry[1]

2159

if not self._parents:

2160

parents = ()

2161

else:

2162

parents = entry[3][0]

2163

if not self._deltas:

2164

compression_parent_key = None

2165

else:

2166

compression_parent_key = self._compression_parent(entry)

2167

noeol = (entry[2][0] == 'N')

2168

if compression_parent_key:

2169

method = 'line-delta'

2170

else:

2171

method = 'fulltext'

2172

result[key] = (self._node_to_position(entry),

2173

compression_parent_key, parents,

2174

(method, noeol))

2175

return result

2176

2177

def _get_entries(self, keys, check_present=False):

2178

"""Get the entries for keys.

2179

2180

:param keys: An iterable of index key tuples.

2181

"""

2182

keys = set(keys)

2183

found_keys = set()

2184

if self._parents:

2185

for node in self._graph_index.iter_entries(keys):

2186

yield node

2187

found_keys.add(node[1])

2188

else:

2189

# adapt parentless index to the rest of the code.

2190

for node in self._graph_index.iter_entries(keys):

2191

yield node[0], node[1], node[2], ()

2192

found_keys.add(node[1])

2193

if check_present:

2194

missing_keys = keys.difference(found_keys)

2195

if missing_keys:

2196

raise RevisionNotPresent(missing_keys.pop(), self)

2197

2198

def get_method(self, key):

2199

"""Return compression method of specified key."""

2200

return self._get_method(self._get_node(key))

2201

2202

def _get_method(self, node):

2203

if not self._deltas:

2204

return 'fulltext'

2205

if self._compression_parent(node):

2206

return 'line-delta'

2207

else:

2208

return 'fulltext'

2209

2210

def _get_node(self, key):

2211

try:

2212

return list(self._get_entries([key]))[0]

2213

except IndexError:

2214

raise RevisionNotPresent(key, self)

2215

2216

def get_options(self, key):

2217

"""Return a list representing options.

2218

2219

e.g. ['foo', 'bar']

2220

"""

2221

node = self._get_node(key)

2222

options = [self._get_method(node)]

2223

if node[2][0] == 'N':

2224

options.append('no-eol')

2225

return options

2226

2227

def get_parent_map(self, keys):

2228

"""Get a map of the parents of keys.

2229

2230

:param keys: The keys to look up parents for.

2231

:return: A mapping from keys to parents. Absent keys are absent from

2232

the mapping.

2233

"""

2234

self._check_read()

2235

nodes = self._get_entries(keys)

2236

result = {}

2237

if self._parents:

2238

for node in nodes:

2239

result[node[1]] = node[3][0]

2240

else:

2241

for node in nodes:

2242

result[node[1]] = None

2243

return result

2244

2245

def get_position(self, key):

2246

"""Return details needed to access the version.

2247

2248

:return: a tuple (index, data position, size) to hand to the access

2249

logic to get the record.

2250

"""

2251

node = self._get_node(key)

2252

return self._node_to_position(node)

2253

2254

def keys(self):

2255

"""Get all the keys in the collection.

2256

2257

The keys are not ordered.

2258

"""

2259

self._check_read()

2260

return [node[1] for node in self._graph_index.iter_all_entries()]

2261

2262

def _node_to_position(self, node):

2263

"""Convert an index value to position details."""

2264

bits = node[2][1:].split(' ')

2265

return node[0], int(bits[0]), int(bits[1])

2266

2267

2268

class _KnitKeyAccess(object):

2269

"""Access to records in .knit files."""

2270

2271

def __init__(self, transport, mapper):

2272

"""Create a _KnitKeyAccess with transport and mapper.

2273

2274

:param transport: The transport the access object is rooted at.

2275

:param mapper: The mapper used to map keys to .knit files.

1800

def _version_ids_to_keys(self, version_ids):

1801

return set((version_id, ) for version_id in version_ids)

1802

1803

1804

class _KnitAccess(object):

1805

"""Access to knit records in a .knit file."""

1806

1807

def __init__(self, transport, filename, _file_mode, _dir_mode,

1808

_need_to_create, _create_parent_dir):

1809

"""Create a _KnitAccess for accessing and inserting data.

1810

1811

:param transport: The transport the .knit is located on.

1812

:param filename: The filename of the .knit.

2276

1813

"""

2277

1814

self._transport = transport

2278

self._mapper = mapper

1815

self._filename = filename

1816

self._file_mode = _file_mode

1817

self._dir_mode = _dir_mode

1818

self._need_to_create = _need_to_create

1819

self._create_parent_dir = _create_parent_dir

2279

1820

2280

def add_raw_records(self, key_sizes, raw_data):

1821

def add_raw_records(self, sizes, raw_data):

2281

1822

"""Add raw knit bytes to a storage area.

2282

1823

2283

The data is spooled to the container writer in one bytes-record per

2284

raw data item.

1824

The data is spooled to whereever the access method is storing data.

2285

1825

2286

:param sizes: An iterable of tuples containing the key and size of each

2287

raw data segment.

1826

:param sizes: An iterable containing the size of each raw data segment.

2288

1827

:param raw_data: A bytestring containing the data.

2289

:return: A list of memos to retrieve the record later. Each memo is an

2290

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

2291

length), where the key is the record key.

1828

:return: A list of memos to retrieve the record later. Each memo is a

1829

tuple - (index, pos, length), where the index field is always None

1830

for the .knit access method.

2292

1831

"""

2293

if type(raw_data) != str:

2294

raise AssertionError(

2295

'data must be plain bytes was %s' % type(raw_data))

1832

assert type(raw_data) == str, \

1833

'data must be plain bytes was %s' % type(raw_data)

1834

if not self._need_to_create:

1835

base = self._transport.append_bytes(self._filename, raw_data)

1836

else:

1837

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1838

create_parent_dir=self._create_parent_dir,

1839

mode=self._file_mode,

1840

dir_mode=self._dir_mode)

1841

self._need_to_create = False

1842

base = 0

2296

1843

result = []

2297

offset = 0

2298

# TODO: This can be tuned for writing to sftp and other servers where

2299

# append() is relatively expensive by grouping the writes to each key

2300

# prefix.

2301

for key, size in key_sizes:

2302

path = self._mapper.map(key)

2303

try:

2304

base = self._transport.append_bytes(path + '.knit',

2305

raw_data[offset:offset+size])

2306

except errors.NoSuchFile:

2307

self._transport.mkdir(osutils.dirname(path))

2308

base = self._transport.append_bytes(path + '.knit',

2309

raw_data[offset:offset+size])

2310

# if base == 0:

2311

# chmod.

2312

offset += size

2313

result.append((key, base, size))

1844

for size in sizes:

1845

result.append((None, base, size))

1846

base += size

2314

1847

return result

2315

1848

1849

def create(self):

1850

"""IFF this data access has its own storage area, initialise it.

1851

1852

:return: None.

1853

"""

1854

self._transport.put_bytes_non_atomic(self._filename, '',

1855

mode=self._file_mode)

1856

1857

def open_file(self):

1858

"""IFF this data access can be represented as a single file, open it.

1859

1860

For knits that are not mapped to a single file on disk this will

1861

always return None.

1862

1863

:return: None or a file handle.

1864

"""

1865

try:

1866

return self._transport.get(self._filename)

1867

except NoSuchFile:

1868

pass

1869

return None

1870

2316

1871

def get_raw_records(self, memos_for_retrieval):

2317

1872

"""Get the raw bytes for a records.

2318

1873

2319

:param memos_for_retrieval: An iterable containing the access memo for

2320

retrieving the bytes.

1874

:param memos_for_retrieval: An iterable containing the (index, pos,

1875

length) memo for retrieving the bytes. The .knit method ignores

1876

the index as there is always only a single file.

2321

1877

:return: An iterator over the bytes of the records.

2322

1878

"""

2323

# first pass, group into same-index request to minimise readv's issued.

2324

request_lists = []

2325

current_prefix = None

2326

for (key, offset, length) in memos_for_retrieval:

2327

if current_prefix == key[:-1]:

2328

current_list.append((offset, length))

2329

else:

2330

if current_prefix is not None:

2331

request_lists.append((current_prefix, current_list))

2332

current_prefix = key[:-1]

2333

current_list = [(offset, length)]

2334

# handle the last entry

2335

if current_prefix is not None:

2336

request_lists.append((current_prefix, current_list))

2337

for prefix, read_vector in request_lists:

2338

path = self._mapper.map(prefix) + '.knit'

2339

for pos, data in self._transport.readv(path, read_vector):

2340

yield data

2341

2342

2343

class _DirectPackAccess(object):

2344

"""Access to data in one or more packs with less translation."""

2345

2346

def __init__(self, index_to_packs):

2347

"""Create a _DirectPackAccess object.

1879

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1880

for pos, data in self._transport.readv(self._filename, read_vector):

1881

yield data

1882

1883

1884

class _PackAccess(object):

1885

"""Access to knit records via a collection of packs."""

1886

1887

def __init__(self, index_to_packs, writer=None):

1888

"""Create a _PackAccess object.

2348

1889

2349

1890

:param index_to_packs: A dict mapping index objects to the transport

2350

1891

and file names for obtaining data.

1892

:param writer: A tuple (pack.ContainerWriter, write_index) which

1893

contains the pack to write, and the index that reads from it will

1894

be associated with.

2351

1895

"""

2352

self._container_writer = None

2353

self._write_index = None

2354

self._indices = index_to_packs

1896

if writer:

1897

self.container_writer = writer[0]

1898

self.write_index = writer[1]

1899

else:

1900

self.container_writer = None

1901

self.write_index = None

1902

self.indices = index_to_packs

2355

1903

2356

def add_raw_records(self, key_sizes, raw_data):

1904

def add_raw_records(self, sizes, raw_data):

2357

1905

"""Add raw knit bytes to a storage area.

2358

1906

2359

1907

The data is spooled to the container writer in one bytes-record per

2360

1908

raw data item.

2361

1909

2362

:param sizes: An iterable of tuples containing the key and size of each

2363

raw data segment.

1910

:param sizes: An iterable containing the size of each raw data segment.

2364

1911

:param raw_data: A bytestring containing the data.

2365

:return: A list of memos to retrieve the record later. Each memo is an

2366

opaque index memo. For _DirectPackAccess the memo is (index, pos,

2367

length), where the index field is the write_index object supplied

2368

to the PackAccess object.

1912

:return: A list of memos to retrieve the record later. Each memo is a

1913

tuple - (index, pos, length), where the index field is the

1914

write_index object supplied to the PackAccess object.

2369

1915

"""

2370

if type(raw_data) != str:

2371

raise AssertionError(

2372

'data must be plain bytes was %s' % type(raw_data))

1916

assert type(raw_data) == str, \

1917

'data must be plain bytes was %s' % type(raw_data)

2373

1918

result = []

2374

1919

offset = 0

2375

for key, size in key_sizes:

2376

p_offset, p_length = self._container_writer.add_bytes_record(

1920

for size in sizes:

1921

p_offset, p_length = self.container_writer.add_bytes_record(

2377

1922

raw_data[offset:offset+size], [])

2378

1923

offset += size

2379

result.append((self._write_index, p_offset, p_length))

1924

result.append((self.write_index, p_offset, p_length))

2380

1925

return result

2381

1926

1927

def create(self):

1928

"""Pack based knits do not get individually created."""

1929

2382

1930

def get_raw_records(self, memos_for_retrieval):

2383

1931

"""Get the raw bytes for a records.

2384

1932

2403

1951

if current_index is not None:

2404

1952

request_lists.append((current_index, current_list))

2405

1953

for index, offsets in request_lists:

2406

transport, path = self._indices[index]

1954

transport, path = self.indices[index]

2407

1955

reader = pack.make_readv_reader(transport, path, offsets)

2408

1956

for names, read_func in reader.iter_records():

2409

1957

yield read_func(None)

2410

1958

2411

def set_writer(self, writer, index, transport_packname):

1959

def open_file(self):

1960

"""Pack based knits have no single file."""

1961

return None

1962

1963

def set_writer(self, writer, index, (transport, packname)):

2412

1964

"""Set a writer to use for adding data."""

2413

if index is not None:

2414

self._indices[index] = transport_packname

2415

self._container_writer = writer

2416

self._write_index = index

1965

self.indices[index] = (transport, packname)

1966

self.container_writer = writer

1967

self.write_index = index

1968

1969

1970

class _KnitData(object):

1971

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1972

1973

The KnitData class provides the logic for parsing and using knit records,

1974

making use of an access method for the low level read and write operations.

1975

"""

1976

1977

def __init__(self, access):

1978

"""Create a KnitData object.

1979

1980

:param access: The access method to use. Access methods such as

1981

_KnitAccess manage the insertion of raw records and the subsequent

1982

retrieval of the same.

1983

"""

1984

self._access = access

1985

self._checked = False

1986

# TODO: jam 20060713 conceptually, this could spill to disk

1987

# if the cached size gets larger than a certain amount

1988

# but it complicates the model a bit, so for now just use

1989

# a simple dictionary

1990

self._cache = {}

1991

self._do_cache = False

1992

1993

def enable_cache(self):

1994

"""Enable caching of reads."""

1995

self._do_cache = True

1996

1997

def clear_cache(self):

1998

"""Clear the record cache."""

1999

self._do_cache = False

2000

self._cache = {}

2001

2002

def _open_file(self):

2003

return self._access.open_file()

2004

2005

def _record_to_data(self, version_id, digest, lines, dense_lines=None):

2006

"""Convert version_id, digest, lines into a raw data block.

2007

2008

:param dense_lines: The bytes of lines but in a denser form. For

2009

instance, if lines is a list of 1000 bytestrings each ending in \n,

2010

dense_lines may be a list with one line in it, containing all the

2011

1000's lines and their \n's. Using dense_lines if it is already

2012

known is a win because the string join to create bytes in this

2013

function spends less time resizing the final string.

2014

:return: (len, a StringIO instance with the raw data ready to read.)

2015

"""

2016

# Note: using a string copy here increases memory pressure with e.g.

2017

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

2018

# when doing the initial commit of a mozilla tree. RBC 20070921

2019

bytes = ''.join(chain(

2020

["version %s %d %s\n" % (version_id,

2021

len(lines),

2022

digest)],

2023

dense_lines or lines,

2024

["end %s\n" % version_id]))

2025

assert bytes.__class__ == str

2026

compressed_bytes = bytes_to_gzip(bytes)

2027

return len(compressed_bytes), compressed_bytes

2028

2029

def add_raw_records(self, sizes, raw_data):

2030

"""Append a prepared record to the data file.

2031

2032

:param sizes: An iterable containing the size of each raw data segment.

2033

:param raw_data: A bytestring containing the data.

2034

:return: a list of index data for the way the data was stored.

2035

See the access method add_raw_records documentation for more

2036

details.

2037

"""

2038

return self._access.add_raw_records(sizes, raw_data)

2039

2040

def _parse_record_header(self, version_id, raw_data):

2041

"""Parse a record header for consistency.

2042

2043

:return: the header and the decompressor stream.

2044

as (stream, header_record)

2045

"""

2046

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2047

try:

2048

rec = self._check_header(version_id, df.readline())

2049

except Exception, e:

2050

raise KnitCorrupt(self._access,

2051

"While reading {%s} got %s(%s)"

2052

% (version_id, e.__class__.__name__, str(e)))

2053

return df, rec

2054

2055

def _check_header(self, version_id, line):

2056

rec = line.split()

2057

if len(rec) != 4:

2058

raise KnitCorrupt(self._access,

2059

'unexpected number of elements in record header')

2060

if rec[1] != version_id:

2061

raise KnitCorrupt(self._access,

2062

'unexpected version, wanted %r, got %r'

2063

% (version_id, rec[1]))

2064

return rec

2065

2066

def _parse_record(self, version_id, data):

2067

# profiling notes:

2068

# 4168 calls in 2880 217 internal

2069

# 4168 calls to _parse_record_header in 2121

2070

# 4168 calls to readlines in 330

2071

df = GzipFile(mode='rb', fileobj=StringIO(data))

2072

2073

try:

2074

record_contents = df.readlines()

2075

except Exception, e:

2076

raise KnitCorrupt(self._access,

2077

"While reading {%s} got %s(%s)"

2078

% (version_id, e.__class__.__name__, str(e)))

2079

header = record_contents.pop(0)

2080

rec = self._check_header(version_id, header)

2081

2082

last_line = record_contents.pop()

2083

if len(record_contents) != int(rec[2]):

2084

raise KnitCorrupt(self._access,

2085

'incorrect number of lines %s != %s'

2086

' for version {%s}'

2087

% (len(record_contents), int(rec[2]),

2088

version_id))

2089

if last_line != 'end %s\n' % rec[1]:

2090

raise KnitCorrupt(self._access,

2091

'unexpected version end line %r, wanted %r'

2092

% (last_line, version_id))

2093

df.close()

2094

return record_contents, rec[3]

2095

2096

def read_records_iter_raw(self, records):

2097

"""Read text records from data file and yield raw data.

2098

2099

This unpacks enough of the text record to validate the id is

2100

as expected but thats all.

2101

"""

2102

# setup an iterator of the external records:

2103

# uses readv so nice and fast we hope.

2104

if len(records):

2105

# grab the disk data needed.

2106

if self._cache:

2107

# Don't check _cache if it is empty

2108

needed_offsets = [index_memo for version_id, index_memo

2109

in records

2110

if version_id not in self._cache]

2111

else:

2112

needed_offsets = [index_memo for version_id, index_memo

2113

in records]

2114

2115

raw_records = self._access.get_raw_records(needed_offsets)

2116

2117

for version_id, index_memo in records:

2118

if version_id in self._cache:

2119

# This data has already been validated

2120

data = self._cache[version_id]

2121

else:

2122

data = raw_records.next()

2123

if self._do_cache:

2124

self._cache[version_id] = data

2125

2126

# validate the header

2127

df, rec = self._parse_record_header(version_id, data)

2128

df.close()

2129

yield version_id, data

2130

2131

def read_records_iter(self, records):

2132

"""Read text records from data file and yield result.

2133

2134

The result will be returned in whatever is the fastest to read.

2135

Not by the order requested. Also, multiple requests for the same

2136

record will only yield 1 response.

2137

:param records: A list of (version_id, pos, len) entries

2138

:return: Yields (version_id, contents, digest) in the order

2139

read, not the order requested

2140

"""

2141

if not records:

2142

return

2143

2144

if self._cache:

2145

# Skip records we have alread seen

2146

yielded_records = set()

2147

needed_records = set()

2148

for record in records:

2149

if record[0] in self._cache:

2150

if record[0] in yielded_records:

2151

continue

2152

yielded_records.add(record[0])

2153

data = self._cache[record[0]]

2154

content, digest = self._parse_record(record[0], data)

2155

yield (record[0], content, digest)

2156

else:

2157

needed_records.add(record)

2158

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2159

else:

2160

needed_records = sorted(set(records), key=operator.itemgetter(1))

2161

2162

if not needed_records:

2163

return

2164

2165

# The transport optimizes the fetching as well

2166

# (ie, reads continuous ranges.)

2167

raw_data = self._access.get_raw_records(

2168

[index_memo for version_id, index_memo in needed_records])

2169

2170

for (version_id, index_memo), data in \

2171

izip(iter(needed_records), raw_data):

2172

content, digest = self._parse_record(version_id, data)

2173

if self._do_cache:

2174

self._cache[version_id] = data

2175

yield version_id, content, digest

2176

2177

def read_records(self, records):

2178

"""Read records into a dictionary."""

2179

components = {}

2180

for record_id, content, digest in \

2181

self.read_records_iter(records):

2182

components[record_id] = (content, digest)

2183

return components

2184

2185

2186

class InterKnit(InterVersionedFile):

2187

"""Optimised code paths for knit to knit operations."""

2188

2189

_matching_file_from_factory = KnitVersionedFile

2190

_matching_file_to_factory = KnitVersionedFile

2191

2192

@staticmethod

2193

def is_compatible(source, target):

2194

"""Be compatible with knits. """

2195

try:

2196

return (isinstance(source, KnitVersionedFile) and

2197

isinstance(target, KnitVersionedFile))

2198

except AttributeError:

2199

return False

2200

2201

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2202

"""See InterVersionedFile.join."""

2203

assert isinstance(self.source, KnitVersionedFile)

2204

assert isinstance(self.target, KnitVersionedFile)

2205

2206

# If the source and target are mismatched w.r.t. annotations vs

2207

# plain, the data needs to be converted accordingly

2208

if self.source.factory.annotated == self.target.factory.annotated:

2209

converter = None

2210

elif self.source.factory.annotated:

2211

converter = self._anno_to_plain_converter

2212

else:

2213

# We're converting from a plain to an annotated knit. This requires

2214

# building the annotations from scratch. The generic join code

2215

# handles this implicitly so we delegate to it.

2216

return super(InterKnit, self).join(pb, msg, version_ids,

2217

ignore_missing)

2218

2219

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2220

if not version_ids:

2221

return 0

2222

2223

pb = ui.ui_factory.nested_progress_bar()

2224

try:

2225

version_ids = list(version_ids)

2226

if None in version_ids:

2227

version_ids.remove(None)

2228

2229

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2230

this_versions = set(self.target._index.get_versions())

2231

# XXX: For efficiency we should not look at the whole index,

2232

# we only need to consider the referenced revisions - they

2233

# must all be present, or the method must be full-text.

2234

# TODO, RBC 20070919

2235

needed_versions = self.source_ancestry - this_versions

2236

2237

if not needed_versions:

2238

return 0

2239

full_list = topo_sort(self.source.get_graph())

2240

2241

version_list = [i for i in full_list if (not self.target.has_version(i)

2242

and i in needed_versions)]

2243

2244

# plan the join:

2245

copy_queue = []

2246

copy_queue_records = []

2247

copy_set = set()

2248

for version_id in version_list:

2249

options = self.source._index.get_options(version_id)

2250

parents = self.source._index.get_parents_with_ghosts(version_id)

2251

# check that its will be a consistent copy:

2252

for parent in parents:

2253

# if source has the parent, we must :

2254

# * already have it or

2255

# * have it scheduled already

2256

# otherwise we don't care

2257

assert (self.target.has_version(parent) or

2258

parent in copy_set or

2259

not self.source.has_version(parent))

2260

index_memo = self.source._index.get_position(version_id)

2261

copy_queue_records.append((version_id, index_memo))

2262

copy_queue.append((version_id, options, parents))

2263

copy_set.add(version_id)

2264

2265

# data suck the join:

2266

count = 0

2267

total = len(version_list)

2268

raw_datum = []

2269

raw_records = []

2270

for (version_id, raw_data), \

2271

(version_id2, options, parents) in \

2272

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2273

copy_queue):

2274

assert version_id == version_id2, 'logic error, inconsistent results'

2275

count = count + 1

2276

pb.update("Joining knit", count, total)

2277

if converter:

2278

size, raw_data = converter(raw_data, version_id, options,

2279

parents)

2280

else:

2281

size = len(raw_data)

2282

raw_records.append((version_id, options, parents, size))

2283

raw_datum.append(raw_data)

2284

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2285

return count

2286

finally:

2287

pb.finished()

2288

2289

def _anno_to_plain_converter(self, raw_data, version_id, options,

2290

parents):

2291

"""Convert annotated content to plain content."""

2292

data, digest = self.source._data._parse_record(version_id, raw_data)

2293

if 'fulltext' in options:

2294

content = self.source.factory.parse_fulltext(data, version_id)

2295

lines = self.target.factory.lower_fulltext(content)

2296

else:

2297

delta = self.source.factory.parse_line_delta(data, version_id,

2298

plain=True)

2299

lines = self.target.factory.lower_line_delta(delta)

2300

return self.target._data._record_to_data(version_id, digest, lines)

2301

2302

2303

InterVersionedFile.register_optimiser(InterKnit)

2304

2305

2306

class WeaveToKnit(InterVersionedFile):

2307

"""Optimised code paths for weave to knit operations."""

2308

2309

_matching_file_from_factory = bzrlib.weave.WeaveFile

2310

_matching_file_to_factory = KnitVersionedFile

2311

2312

@staticmethod

2313

def is_compatible(source, target):

2314

"""Be compatible with weaves to knits."""

2315

try:

2316

return (isinstance(source, bzrlib.weave.Weave) and

2317

isinstance(target, KnitVersionedFile))

2318

except AttributeError:

2319

return False

2320

2321

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2322

"""See InterVersionedFile.join."""

2323

assert isinstance(self.source, bzrlib.weave.Weave)

2324

assert isinstance(self.target, KnitVersionedFile)

2325

2326

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2327

2328

if not version_ids:

2329

return 0

2330

2331

pb = ui.ui_factory.nested_progress_bar()

2332

try:

2333

version_ids = list(version_ids)

2334

2335

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2336

this_versions = set(self.target._index.get_versions())

2337

needed_versions = self.source_ancestry - this_versions

2338

2339

if not needed_versions:

2340

return 0

2341

full_list = topo_sort(self.source.get_graph())

2342

2343

version_list = [i for i in full_list if (not self.target.has_version(i)

2344

and i in needed_versions)]

2345

2346

# do the join:

2347

count = 0

2348

total = len(version_list)

2349

for version_id in version_list:

2350

pb.update("Converting to knit", count, total)

2351

parents = self.source.get_parents(version_id)

2352

# check that its will be a consistent copy:

2353

for parent in parents:

2354

# if source has the parent, we must already have it

2355

assert (self.target.has_version(parent))

2356

self.target.add_lines(

2357

version_id, parents, self.source.get_lines(version_id))

2358

count = count + 1

2359

return count

2360

finally:

2361

pb.finished()

2362

2363

2364

InterVersionedFile.register_optimiser(WeaveToKnit)

2417

2365

2418

2366

2419

2367

# Deprecated, use PatienceSequenceMatcher instead

2427

2375

It will work for knits with cached annotations, but this is not

2428

2376

recommended.

2429

2377

"""

2430

annotator = _KnitAnnotator(knit)

2431

return iter(annotator.annotate(revision_id))

2432

2433

2434

class _KnitAnnotator(object):

2435

"""Build up the annotations for a text."""

2436

2437

def __init__(self, knit):

2438

self._knit = knit

2439

2440

# Content objects, differs from fulltexts because of how final newlines

2441

# are treated by knits. the content objects here will always have a

2442

# final newline

2443

self._fulltext_contents = {}

2444

2445

# Annotated lines of specific revisions

2446

self._annotated_lines = {}

2447

2448

# Track the raw data for nodes that we could not process yet.

2449

# This maps the revision_id of the base to a list of children that will

2450

# annotated from it.

2451

self._pending_children = {}

2452

2453

# Nodes which cannot be extracted

2454

self._ghosts = set()

2455

2456

# Track how many children this node has, so we know if we need to keep

2457

# it

2458

self._annotate_children = {}

2459

self._compression_children = {}

2460

2461

self._all_build_details = {}

2462

# The children => parent revision_id graph

2463

self._revision_id_graph = {}

2464

2465

self._heads_provider = None

2466

2467

self._nodes_to_keep_annotations = set()

2468

self._generations_until_keep = 100

2469

2470

def set_generations_until_keep(self, value):

2471

"""Set the number of generations before caching a node.

2472

2473

Setting this to -1 will cache every merge node, setting this higher

2474

will cache fewer nodes.

2475

"""

2476

self._generations_until_keep = value

2477

2478

def _add_fulltext_content(self, revision_id, content_obj):

2479

self._fulltext_contents[revision_id] = content_obj

2480

# TODO: jam 20080305 It might be good to check the sha1digest here

2481

return content_obj.text()

2482

2483

def _check_parents(self, child, nodes_to_annotate):

2484

"""Check if all parents have been processed.

2485

2486

:param child: A tuple of (rev_id, parents, raw_content)

2487

:param nodes_to_annotate: If child is ready, add it to

2488

nodes_to_annotate, otherwise put it back in self._pending_children

2489

"""

2490

for parent_id in child[1]:

2491

if (parent_id not in self._annotated_lines):

2492

# This parent is present, but another parent is missing

2493

self._pending_children.setdefault(parent_id,

2494

[]).append(child)

2495

break

2378

ancestry = knit.get_ancestry(revision_id)

2379

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2380

annotations = {}

2381

for candidate in ancestry:

2382

if candidate in annotations:

2383

continue

2384

parents = knit.get_parents(candidate)

2385

if len(parents) == 0:

2386

blocks = None

2387

elif knit._index.get_method(candidate) != 'line-delta':

2388

blocks = None

2496

2389

else:

2497

# This one is ready to be processed

2498

nodes_to_annotate.append(child)

2499

2500

def _add_annotation(self, revision_id, fulltext, parent_ids,

2501

left_matching_blocks=None):

2502

"""Add an annotation entry.

2503

2504

All parents should already have been annotated.

2505

:return: A list of children that now have their parents satisfied.

2506

"""

2507

a = self._annotated_lines

2508

annotated_parent_lines = [a[p] for p in parent_ids]

2509

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2510

fulltext, revision_id, left_matching_blocks,

2511

heads_provider=self._get_heads_provider()))

2512

self._annotated_lines[revision_id] = annotated_lines

2513

for p in parent_ids:

2514

ann_children = self._annotate_children[p]

2515

ann_children.remove(revision_id)

2516

if (not ann_children

2517

and p not in self._nodes_to_keep_annotations):

2518

del self._annotated_lines[p]

2519

del self._all_build_details[p]

2520

if p in self._fulltext_contents:

2521

del self._fulltext_contents[p]

2522

# Now that we've added this one, see if there are any pending

2523

# deltas to be done, certainly this parent is finished

2524

nodes_to_annotate = []

2525

for child in self._pending_children.pop(revision_id, []):

2526

self._check_parents(child, nodes_to_annotate)

2527

return nodes_to_annotate

2528

2529

def _get_build_graph(self, key):

2530

"""Get the graphs for building texts and annotations.

2531

2532

The data you need for creating a full text may be different than the

2533

data you need to annotate that text. (At a minimum, you need both

2534

parents to create an annotation, but only need 1 parent to generate the

2535

fulltext.)

2536

2537

:return: A list of (key, index_memo) records, suitable for

2538

passing to read_records_iter to start reading in the raw data fro/

2539

the pack file.

2540

"""

2541

if key in self._annotated_lines:

2542

# Nothing to do

2543

return []

2544

pending = set([key])

2545

records = []

2546

generation = 0

2547

kept_generation = 0

2548

while pending:

2549

# get all pending nodes

2550

generation += 1

2551

this_iteration = pending

2552

build_details = self._knit._index.get_build_details(this_iteration)

2553

self._all_build_details.update(build_details)

2554

# new_nodes = self._knit._index._get_entries(this_iteration)

2555

pending = set()

2556

for key, details in build_details.iteritems():

2557

(index_memo, compression_parent, parents,

2558

record_details) = details

2559

self._revision_id_graph[key] = parents

2560

records.append((key, index_memo))

2561

# Do we actually need to check _annotated_lines?

2562

pending.update(p for p in parents

2563

if p not in self._all_build_details)

2564

if compression_parent:

2565

self._compression_children.setdefault(compression_parent,

2566

[]).append(key)

2567

if parents:

2568

for parent in parents:

2569

self._annotate_children.setdefault(parent,

2570

[]).append(key)

2571

num_gens = generation - kept_generation

2572

if ((num_gens >= self._generations_until_keep)

2573

and len(parents) > 1):

2574

kept_generation = generation

2575

self._nodes_to_keep_annotations.add(key)

2576

2577

missing_versions = this_iteration.difference(build_details.keys())

2578

self._ghosts.update(missing_versions)

2579

for missing_version in missing_versions:

2580

# add a key, no parents

2581

self._revision_id_graph[missing_version] = ()

2582

pending.discard(missing_version) # don't look for it

2583

if self._ghosts.intersection(self._compression_children):

2584

raise KnitCorrupt(

2585

"We cannot have nodes which have a ghost compression parent:\n"

2586

"ghosts: %r\n"

2587

"compression children: %r"

2588

% (self._ghosts, self._compression_children))

2589

# Cleanout anything that depends on a ghost so that we don't wait for

2590

# the ghost to show up

2591

for node in self._ghosts:

2592

if node in self._annotate_children:

2593

# We won't be building this node

2594

del self._annotate_children[node]

2595

# Generally we will want to read the records in reverse order, because

2596

# we find the parent nodes after the children

2597

records.reverse()

2598

return records

2599

2600

def _annotate_records(self, records):

2601

"""Build the annotations for the listed records."""

2602

# We iterate in the order read, rather than a strict order requested

2603

# However, process what we can, and put off to the side things that

2604

# still need parents, cleaning them up when those parents are

2605

# processed.

2606

for (rev_id, record,

2607

digest) in self._knit._read_records_iter(records):

2608

if rev_id in self._annotated_lines:

2609

continue

2610

parent_ids = self._revision_id_graph[rev_id]

2611

parent_ids = [p for p in parent_ids if p not in self._ghosts]

2612

details = self._all_build_details[rev_id]

2613

(index_memo, compression_parent, parents,

2614

record_details) = details

2615

nodes_to_annotate = []

2616

# TODO: Remove the punning between compression parents, and

2617

# parent_ids, we should be able to do this without assuming

2618

# the build order

2619

if len(parent_ids) == 0:

2620

# There are no parents for this node, so just add it

2621

# TODO: This probably needs to be decoupled

2622

fulltext_content, delta = self._knit._factory.parse_record(

2623

rev_id, record, record_details, None)

2624

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

2625

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2626

parent_ids, left_matching_blocks=None))

2627

else:

2628

child = (rev_id, parent_ids, record)

2629

# Check if all the parents are present

2630

self._check_parents(child, nodes_to_annotate)

2631

while nodes_to_annotate:

2632

# Should we use a queue here instead of a stack?

2633

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2634

(index_memo, compression_parent, parents,

2635

record_details) = self._all_build_details[rev_id]

2636

if compression_parent is not None:

2637

comp_children = self._compression_children[compression_parent]

2638

if rev_id not in comp_children:

2639

raise AssertionError("%r not in compression children %r"

2640

% (rev_id, comp_children))

2641

# If there is only 1 child, it is safe to reuse this

2642

# content

2643

reuse_content = (len(comp_children) == 1

2644

and compression_parent not in

2645

self._nodes_to_keep_annotations)

2646

if reuse_content:

2647

# Remove it from the cache since it will be changing

2648

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

2649

# Make sure to copy the fulltext since it might be

2650

# modified

2651

parent_fulltext = list(parent_fulltext_content.text())

2652

else:

2653

parent_fulltext_content = self._fulltext_contents[compression_parent]

2654

parent_fulltext = parent_fulltext_content.text()

2655

comp_children.remove(rev_id)

2656

fulltext_content, delta = self._knit._factory.parse_record(

2657

rev_id, record, record_details,

2658

parent_fulltext_content,

2659

copy_base_content=(not reuse_content))

2660

fulltext = self._add_fulltext_content(rev_id,

2661

fulltext_content)

2662

blocks = KnitContent.get_line_delta_blocks(delta,

2663

parent_fulltext, fulltext)

2664

else:

2665

fulltext_content = self._knit._factory.parse_fulltext(

2666

record, rev_id)

2667

fulltext = self._add_fulltext_content(rev_id,

2668

fulltext_content)

2669

blocks = None

2670

nodes_to_annotate.extend(

2671

self._add_annotation(rev_id, fulltext, parent_ids,

2672

left_matching_blocks=blocks))

2673

2674

def _get_heads_provider(self):

2675

"""Create a heads provider for resolving ancestry issues."""

2676

if self._heads_provider is not None:

2677

return self._heads_provider

2678

parent_provider = _mod_graph.DictParentsProvider(

2679

self._revision_id_graph)

2680

graph_obj = _mod_graph.Graph(parent_provider)

2681

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

2682

self._heads_provider = head_cache

2683

return head_cache

2684

2685

def annotate(self, key):

2686

"""Return the annotated fulltext at the given key.

2687

2688

:param key: The key to annotate.

2689

"""

2690

if True or len(self._knit._fallback_vfs) > 0:

2691

# stacked knits can't use the fast path at present.

2692

return self._simple_annotate(key)

2693

records = self._get_build_graph(key)

2694

if key in self._ghosts:

2695

raise errors.RevisionNotPresent(key, self._knit)

2696

self._annotate_records(records)

2697

return self._annotated_lines[key]

2698

2699

def _simple_annotate(self, key):

2700

"""Return annotated fulltext, rediffing from the full texts.

2701

2702

This is slow but makes no assumptions about the repository

2703

being able to produce line deltas.

2704

"""

2705

# TODO: this code generates a parent maps of present ancestors; it

2706

# could be split out into a separate method, and probably should use

2707

# iter_ancestry instead. -- mbp and robertc 20080704

2708

graph = _mod_graph.Graph(self._knit)

2709

head_cache = _mod_graph.FrozenHeadsCache(graph)

2710

search = graph._make_breadth_first_searcher([key])

2711

keys = set()

2712

while True:

2713

try:

2714

present, ghosts = search.next_with_ghosts()

2715

except StopIteration:

2716

break

2717

keys.update(present)

2718

parent_map = self._knit.get_parent_map(keys)

2719

parent_cache = {}

2720

reannotate = annotate.reannotate

2721

for record in self._knit.get_record_stream(keys, 'topological', True):

2722

key = record.key

2723

fulltext = split_lines(record.get_bytes_as('fulltext'))

2724

parents = parent_map[key]

2725

if parents is not None:

2726

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2727

else:

2728

parent_lines = []

2729

parent_cache[key] = list(

2730

reannotate(parent_lines, fulltext, key, None, head_cache))

2731

try:

2732

return parent_cache[key]

2733

except KeyError, e:

2734

raise errors.RevisionNotPresent(key, self._knit)

2390

parent, sha1, noeol, delta = knit.get_delta(candidate)

2391

blocks = KnitContent.get_line_delta_blocks(delta,

2392

fulltext[parents[0]], fulltext[candidate])

2393

annotations[candidate] = list(annotate.reannotate([annotations[p]

2394

for p in parents], fulltext[candidate], candidate, blocks))

2395

return iter(annotations[revision_id])

2735

2396

2736

2397

2737

2398

try:

Older »