~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2011-05-04 12:10:51 UTC
mfrom: (5819.1.4 777007-developer-doc)
Revision ID: pqm@pqm.ubuntu.com-20110504121051-aovlsmqiivjmc4fc

(jelmer) Small fixes to developer documentation. (Jonathan Riddell)

files added:
.testr.conf

MANIFEST.in

NEWS

README_BDIST_RPM

apport

apport/README

apport/bzr-crashdb.conf

apport/source_bzr.py

bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_export_c_api.h

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_import_c_api.h

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/cethread.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cleanup.py

bzrlib/cmd_test_script.py

bzrlib/cmdline.py

bzrlib/controldir.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc_generate/builders

bzrlib/doc_generate/builders/__init__.py

bzrlib/doc_generate/builders/texinfo.py

bzrlib/doc_generate/conf.py

bzrlib/doc_generate/writers

bzrlib/doc_generate/writers/__init__.py

bzrlib/doc_generate/writers/texinfo.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/location-alias.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/help_topics/es

bzrlib/help_topics/es/conflict-types.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory_delta.py

bzrlib/library_state.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/merge_directive.py

bzrlib/mergetools.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/bash_completion

bzrlib/plugins/bash_completion/README.txt

bzrlib/plugins/bash_completion/__init__.py

bzrlib/plugins/bash_completion/bashcomp.py

bzrlib/plugins/bash_completion/tests

bzrlib/plugins/bash_completion/tests/__init__.py

bzrlib/plugins/bash_completion/tests/test_bashcomp.py

bzrlib/plugins/changelog_merge

bzrlib/plugins/changelog_merge/__init__.py

bzrlib/plugins/changelog_merge/changelog_merge.py

bzrlib/plugins/changelog_merge/tests

bzrlib/plugins/changelog_merge/tests/__init__.py

bzrlib/plugins/changelog_merge/tests/test_changelog_merge.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_api.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_api.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/plugins/news_merge

bzrlib/plugins/news_merge/README

bzrlib/plugins/news_merge/__init__.py

bzrlib/plugins/news_merge/news_merge.py

bzrlib/plugins/news_merge/parser.py

bzrlib/plugins/news_merge/tests

bzrlib/plugins/news_merge/tests/__init__.py

bzrlib/plugins/news_merge/tests/test_news_merge.py

bzrlib/plugins/weave_fmt

bzrlib/plugins/weave_fmt/__init__.py

bzrlib/plugins/weave_fmt/branch.py

bzrlib/plugins/weave_fmt/bzrdir.py

bzrlib/plugins/weave_fmt/repository.py

bzrlib/plugins/weave_fmt/test_bzrdir.py

bzrlib/plugins/weave_fmt/test_repository.py

bzrlib/plugins/weave_fmt/test_workingtree.py

bzrlib/plugins/weave_fmt/tests

bzrlib/plugins/weave_fmt/workingtree.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/pyutils.py

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/recordcounter.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitpack_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/static_tuple.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_config.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_lookup_revision.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_repair_workingtree.py

bzrlib/tests/blackbox/test_resolve.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_script.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_revert.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/doc_generate

bzrlib/tests/doc_generate/__init__.py

bzrlib/tests/doc_generate/builders

bzrlib/tests/doc_generate/builders/__init__.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/doc_generate/writers

bzrlib/tests/doc_generate/writers/__init__.py

bzrlib/tests/doc_generate/writers/test_texinfo.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/file_utils.py

bzrlib/tests/fixtures.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/matchers.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_config.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_controldir/test_format.py

bzrlib/tests/per_controldir/test_push.py

bzrlib/tests/per_controldir_colo

bzrlib/tests/per_controldir_colo/__init__.py

bzrlib/tests/per_controldir_colo/test_supported.py

bzrlib/tests/per_controldir_colo/test_unsupported.py

bzrlib/tests/per_foreign_vcs

bzrlib/tests/per_foreign_vcs/__init__.py

bzrlib/tests/per_foreign_vcs/test_branch.py

bzrlib/tests/per_foreign_vcs/test_repository.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_copy_content_into.py

bzrlib/tests/per_interbranch/test_fetch.py

bzrlib/tests/per_interbranch/test_get.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_inventory

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_merger.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_commit_with_stacking.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_graph.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_repository_vf

bzrlib/tests/per_repository_vf/__init__.py

bzrlib/tests/per_repository_vf/helpers.py

bzrlib/tests/per_repository_vf/test_check.py

bzrlib/tests/per_repository_vf/test_check_reconcile.py

bzrlib/tests/per_repository_vf/test_reconcile.py

bzrlib/tests/per_repository_vf/test_repository.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_is_executable.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_locking.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_uifactory

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_check_state.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_symlinks.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/scenarios.py

bzrlib/tests/script.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__bencode.py

bzrlib/tests/test__btree_serializer.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_cethread.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_cmdline.py

bzrlib/tests/test_controldir.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_fixtures.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_library_state.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_matchers.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_mergetools.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches_data/binary-after-normal.patch

bzrlib/tests/test_patches_data/binary.patch

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_pyutils.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_scenarios.py

bzrlib/tests/test_script.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_server.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_test_server.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_treeshape.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/testui.py

bzrlib/tests/transport_util.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/gio_transport.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/pathfilter.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/_bencode_py.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/views.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzr

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/debian

contrib/debian/default

contrib/debian/init.d

contrib/zsh/README

doc/Bazaar-Logo-For-Manuals.png

doc/developers

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr-doc.css

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/conf.py

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/documenting-changes.txt

doc/developers/ec2.txt

doc/developers/fetch.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/implementation-notes.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/miscellaneous-notes.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plans.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/principles.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/specifications.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/transports.txt

doc/developers/ui.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/developers/win32_build_setup.txt

doc/developers/xdg_config_spec.txt

doc/en

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_static/en/Makefile

doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/_static/en/bzr-en-quick-reference.png

doc/en/_static/en/bzr-en-quick-reference.svg

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide

doc/en/admin-guide/advanced.txt

doc/en/admin-guide/backup.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/index-plain.txt

doc/en/admin-guide/index.txt

doc/en/admin-guide/integration.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/security.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/release-notes/release-template.txt

doc/en/release-notes/series-template.txt

doc/en/tutorials

doc/en/tutorials/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/en/whats-new

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

doc/en/whats-new/whats-new-in-2.4.txt

doc/es

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_static/es/Makefile

doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/_static/es/bzr-es-quick-reference.png

doc/es/_static/es/bzr-es-quick-reference.svg

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/index.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/quick-reference

doc/es/quick-reference/index.txt

doc/es/user-guide

doc/es/user-guide/index-plain.txt

doc/es/user-guide/index.txt

doc/es/user-guide/version_info.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/index.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

doc/news-template.txt

doc/ru

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_static/ru/Makefile

doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/index.txt

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index-plain.txt

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/fixed-in.py

tools/generate_release_notes.py

tools/package_docs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-control.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/py2exe_boot_common.py

tools/win32/run_script.py

files removed:
NEWS.developers

build-api

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/textui.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/weave_commands.py

bzrlib/xml6.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/zsh/_bzr

doc/README.1st

doc/bazaar-vcs.org.kid

doc/index.txt

tools/biobench.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/trace-revisions

tools/weavebench.py

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/graph.py => bzrlib/deprecated_graph.py

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/help_topics.py => bzrlib/help_topics/__init__.py

doc/configuration.txt => bzrlib/help_topics/en/configuration.txt

bzrlib/xml4.py => bzrlib/plugins/weave_fmt/xml4.py

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

bzrlib/tests/HttpServer.py => bzrlib/tests/http_server.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_controldir/

bzrlib/tests/bzrdir_implementations/test_bzrdir.py => bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/transport/ftp.py => bzrlib/transport/ftp/__init__.py

bzrlib/transport/smart.py => bzrlib/transport/remote.py

bzrlib/win32console.py => bzrlib/win32utils.py

bzrlib/xml5.py => bzrlib/xml8.py

HACKING => doc/developers/HACKING.txt

NEWS => doc/en/release-notes/bzr-2.3.txt

doc/centralized_workflow.txt => doc/en/tutorials/centralized_workflow.txt

doc/tutorial.txt => doc/en/tutorials/tutorial.txt

doc/http_smart_server.txt => doc/en/user-guide/http_smart_server.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/server.txt => doc/en/user-guide/server.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

doc/version_info.txt => doc/en/user-guide/version_info.txt

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_controldir/__init__.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml_serializer.py

doc/default.css

profile_imports.py

setup.py *

tools/capture_tree.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Knit versionedfile implementation.

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

from itertools import izip

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

import gzip

from bzrlib import (

debug,

diff,

graph as _mod_graph,

index as _mod_index,

pack,

patiencediff,

static_tuple,

trace,

tsort,

tuned_gzip,

ui,

)

from bzrlib.repofmt import pack_repo

""")

from bzrlib import (

annotate,

errors,

patiencediff,

progress,

)

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

import bzrlib.ui

import bzrlib.weave

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

osutils,

)

from bzrlib.errors import (

NoSuchFile,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

contains_whitespace,

sha_string,

sha_strings,

split_lines,

)

from bzrlib.versionedfile import (

100

_KeyRefs,

101

AbsentContentFactory,

102

adapter_registry,

103

ConstantMapper,

104

ContentFactory,

105

sort_groupcompress,

106

VersionedFiles,

107

)

108

109

110

# TODO: Split out code specific to this format into an associated object.

109

122

110

123

DATA_SUFFIX = '.knit'

111

124

INDEX_SUFFIX = '.kndx'

125

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

126

127

128

class KnitAdapter(object):

129

"""Base class for knit record adaption."""

130

131

def __init__(self, basis_vf):

132

"""Create an adapter which accesses full texts from basis_vf.

133

134

:param basis_vf: A versioned file to access basis texts of deltas from.

135

May be None for adapters that do not need to access basis texts.

136

"""

137

self._data = KnitVersionedFiles(None, None)

138

self._annotate_factory = KnitAnnotateFactory()

139

self._plain_factory = KnitPlainFactory()

140

self._basis_vf = basis_vf

141

142

143

class FTAnnotatedToUnannotated(KnitAdapter):

144

"""An adapter from FT annotated knits to unannotated ones."""

145

146

def get_bytes(self, factory):

147

annotated_compressed_bytes = factory._raw_record

148

rec, contents = \

149

self._data._parse_record_unchecked(annotated_compressed_bytes)

150

content = self._annotate_factory.parse_fulltext(contents, rec[1])

151

size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())

152

return bytes

153

154

155

class DeltaAnnotatedToUnannotated(KnitAdapter):

156

"""An adapter for deltas from annotated to unannotated."""

157

158

def get_bytes(self, factory):

159

annotated_compressed_bytes = factory._raw_record

160

rec, contents = \

161

self._data._parse_record_unchecked(annotated_compressed_bytes)

162

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

163

plain=True)

164

contents = self._plain_factory.lower_line_delta(delta)

165

size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)

166

return bytes

167

168

169

class FTAnnotatedToFullText(KnitAdapter):

170

"""An adapter from FT annotated knits to unannotated ones."""

171

172

def get_bytes(self, factory):

173

annotated_compressed_bytes = factory._raw_record

174

rec, contents = \

175

self._data._parse_record_unchecked(annotated_compressed_bytes)

176

content, delta = self._annotate_factory.parse_record(factory.key[-1],

177

contents, factory._build_details, None)

178

return ''.join(content.text())

179

180

181

class DeltaAnnotatedToFullText(KnitAdapter):

182

"""An adapter for deltas from annotated to unannotated."""

183

184

def get_bytes(self, factory):

185

annotated_compressed_bytes = factory._raw_record

186

rec, contents = \

187

self._data._parse_record_unchecked(annotated_compressed_bytes)

188

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

189

plain=True)

190

compression_parent = factory.parents[0]

191

basis_entry = self._basis_vf.get_record_stream(

192

[compression_parent], 'unordered', True).next()

193

if basis_entry.storage_kind == 'absent':

194

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

195

basis_chunks = basis_entry.get_bytes_as('chunked')

196

basis_lines = osutils.chunks_to_lines(basis_chunks)

197

# Manually apply the delta because we have one annotated content and

198

# one plain.

199

basis_content = PlainKnitContent(basis_lines, compression_parent)

200

basis_content.apply_delta(delta, rec[1])

201

basis_content._should_strip_eol = factory._build_details[1]

202

return ''.join(basis_content.text())

203

204

205

class FTPlainToFullText(KnitAdapter):

206

"""An adapter from FT plain knits to unannotated ones."""

207

208

def get_bytes(self, factory):

209

compressed_bytes = factory._raw_record

210

rec, contents = \

211

self._data._parse_record_unchecked(compressed_bytes)

212

content, delta = self._plain_factory.parse_record(factory.key[-1],

213

contents, factory._build_details, None)

214

return ''.join(content.text())

215

216

217

class DeltaPlainToFullText(KnitAdapter):

218

"""An adapter for deltas from annotated to unannotated."""

219

220

def get_bytes(self, factory):

221

compressed_bytes = factory._raw_record

222

rec, contents = \

223

self._data._parse_record_unchecked(compressed_bytes)

224

delta = self._plain_factory.parse_line_delta(contents, rec[1])

225

compression_parent = factory.parents[0]

226

# XXX: string splitting overhead.

227

basis_entry = self._basis_vf.get_record_stream(

228

[compression_parent], 'unordered', True).next()

229

if basis_entry.storage_kind == 'absent':

230

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

231

basis_chunks = basis_entry.get_bytes_as('chunked')

232

basis_lines = osutils.chunks_to_lines(basis_chunks)

233

basis_content = PlainKnitContent(basis_lines, compression_parent)

234

# Manually apply the delta because we have one annotated content and

235

# one plain.

236

content, _ = self._plain_factory.parse_record(rec[1], contents,

237

factory._build_details, basis_content)

238

return ''.join(content.text())

239

240

241

class KnitContentFactory(ContentFactory):

242

"""Content factory for streaming from knits.

243

244

:seealso ContentFactory:

245

"""

246

247

def __init__(self, key, parents, build_details, sha1, raw_record,

248

annotated, knit=None, network_bytes=None):

249

"""Create a KnitContentFactory for key.

250

251

:param key: The key.

252

:param parents: The parents.

253

:param build_details: The build details as returned from

254

get_build_details.

255

:param sha1: The sha1 expected from the full text of this object.

256

:param raw_record: The bytes of the knit data from disk.

257

:param annotated: True if the raw data is annotated.

258

:param network_bytes: None to calculate the network bytes on demand,

259

not-none if they are already known.

260

"""

261

ContentFactory.__init__(self)

262

self.sha1 = sha1

263

self.key = key

264

self.parents = parents

265

if build_details[0] == 'line-delta':

266

kind = 'delta'

267

else:

268

kind = 'ft'

269

if annotated:

270

annotated_kind = 'annotated-'

271

else:

272

annotated_kind = ''

273

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

274

self._raw_record = raw_record

275

self._network_bytes = network_bytes

276

self._build_details = build_details

277

self._knit = knit

278

279

def _create_network_bytes(self):

280

"""Create a fully serialised network version for transmission."""

281

# storage_kind, key, parents, Noeol, raw_record

282

key_bytes = '\x00'.join(self.key)

283

if self.parents is None:

284

parent_bytes = 'None:'

285

else:

286

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

287

if self._build_details[1]:

288

noeol = 'N'

289

else:

290

noeol = ' '

291

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

292

parent_bytes, noeol, self._raw_record)

293

self._network_bytes = network_bytes

294

295

def get_bytes_as(self, storage_kind):

296

if storage_kind == self.storage_kind:

297

if self._network_bytes is None:

298

self._create_network_bytes()

299

return self._network_bytes

300

if ('-ft-' in self.storage_kind and

301

storage_kind in ('chunked', 'fulltext')):

302

adapter_key = (self.storage_kind, 'fulltext')

303

adapter_factory = adapter_registry.get(adapter_key)

304

adapter = adapter_factory(None)

305

bytes = adapter.get_bytes(self)

306

if storage_kind == 'chunked':

307

return [bytes]

308

else:

309

return bytes

310

if self._knit is not None:

311

# Not redundant with direct conversion above - that only handles

312

# fulltext cases.

313

if storage_kind == 'chunked':

314

return self._knit.get_lines(self.key[0])

315

elif storage_kind == 'fulltext':

316

return self._knit.get_text(self.key[0])

317

raise errors.UnavailableRepresentation(self.key, storage_kind,

318

self.storage_kind)

319

320

321

class LazyKnitContentFactory(ContentFactory):

322

"""A ContentFactory which can either generate full text or a wire form.

323

324

:seealso ContentFactory:

325

"""

326

327

def __init__(self, key, parents, generator, first):

328

"""Create a LazyKnitContentFactory.

329

330

:param key: The key of the record.

331

:param parents: The parents of the record.

332

:param generator: A _ContentMapGenerator containing the record for this

333

key.

334

:param first: Is this the first content object returned from generator?

335

if it is, its storage kind is knit-delta-closure, otherwise it is

336

knit-delta-closure-ref

337

"""

338

self.key = key

339

self.parents = parents

340

self.sha1 = None

341

self._generator = generator

342

self.storage_kind = "knit-delta-closure"

343

if not first:

344

self.storage_kind = self.storage_kind + "-ref"

345

self._first = first

346

347

def get_bytes_as(self, storage_kind):

348

if storage_kind == self.storage_kind:

349

if self._first:

350

return self._generator._wire_bytes()

351

else:

352

# all the keys etc are contained in the bytes returned in the

353

# first record.

354

return ''

355

if storage_kind in ('chunked', 'fulltext'):

356

chunks = self._generator._get_one_work(self.key).text()

357

if storage_kind == 'chunked':

358

return chunks

359

else:

360

return ''.join(chunks)

361

raise errors.UnavailableRepresentation(self.key, storage_kind,

362

self.storage_kind)

363

364

365

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

366

"""Convert a network record to a iterator over stream records.

367

368

:param storage_kind: The storage kind of the record.

369

Must be 'knit-delta-closure'.

370

:param bytes: The bytes of the record on the network.

371

"""

372

generator = _NetworkContentMapGenerator(bytes, line_end)

373

return generator.get_record_stream()

374

375

376

def knit_network_to_record(storage_kind, bytes, line_end):

377

"""Convert a network record to a record object.

378

379

:param storage_kind: The storage kind of the record.

380

:param bytes: The bytes of the record on the network.

381

"""

382

start = line_end

383

line_end = bytes.find('\n', start)

384

key = tuple(bytes[start:line_end].split('\x00'))

385

start = line_end + 1

386

line_end = bytes.find('\n', start)

387

parent_line = bytes[start:line_end]

388

if parent_line == 'None:':

389

parents = None

390

else:

391

parents = tuple(

392

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

393

if segment])

394

start = line_end + 1

395

noeol = bytes[start] == 'N'

396

if 'ft' in storage_kind:

397

method = 'fulltext'

398

else:

399

method = 'line-delta'

400

build_details = (method, noeol)

401

start = start + 1

402

raw_record = bytes[start:]

403

annotated = 'annotated' in storage_kind

404

return [KnitContentFactory(key, parents, build_details, None, raw_record,

405

annotated, network_bytes=bytes)]

112

406

113

407

114

408

class KnitContent(object):

115

"""Content of a knit version to which deltas can be applied."""

116

117

def __init__(self, lines):

118

self._lines = lines

119

120

def annotate_iter(self):

121

"""Yield tuples of (origin, text) for each content line."""

122

return iter(self._lines)

123

124

def annotate(self):

125

"""Return a list of (origin, text) tuples."""

126

return list(self.annotate_iter())

409

"""Content of a knit version to which deltas can be applied.

410

411

This is always stored in memory as a list of lines with \n at the end,

412

plus a flag saying if the final ending is really there or not, because that

413

corresponds to the on-disk knit representation.

414

"""

415

416

def __init__(self):

417

self._should_strip_eol = False

418

419

def apply_delta(self, delta, new_version_id):

420

"""Apply delta to this object to become new_version_id."""

421

raise NotImplementedError(self.apply_delta)

127

422

128

423

def line_delta_iter(self, new_lines):

129

424

"""Generate line-based delta from this content to new_lines."""

130

425

new_texts = new_lines.text()

131

426

old_texts = self.text()

132

s = KnitSequenceMatcher(None, old_texts, new_texts)

427

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

133

428

for tag, i1, i2, j1, j2 in s.get_opcodes():

134

429

if tag == 'equal':

135

430

continue

139

434

def line_delta(self, new_lines):

140

435

return list(self.line_delta_iter(new_lines))

141

436

142

def text(self):

143

return [text for origin, text in self._lines]

144

145

def copy(self):

146

return KnitContent(self._lines[:])

437

@staticmethod

438

def get_line_delta_blocks(knit_delta, source, target):

439

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

440

target_len = len(target)

441

s_pos = 0

442

t_pos = 0

443

for s_begin, s_end, t_len, new_text in knit_delta:

444

true_n = s_begin - s_pos

445

n = true_n

446

if n > 0:

447

# knit deltas do not provide reliable info about whether the

448

# last line of a file matches, due to eol handling.

449

if source[s_pos + n -1] != target[t_pos + n -1]:

450

n-=1

451

if n > 0:

452

yield s_pos, t_pos, n

453

t_pos += t_len + true_n

454

s_pos = s_end

455

n = target_len - t_pos

456

if n > 0:

457

if source[s_pos + n -1] != target[t_pos + n -1]:

458

n-=1

459

if n > 0:

460

yield s_pos, t_pos, n

461

yield s_pos + (target_len - t_pos), target_len, 0

462

463

464

class AnnotatedKnitContent(KnitContent):

465

"""Annotated content."""

466

467

def __init__(self, lines):

468

KnitContent.__init__(self)

469

self._lines = lines

470

471

def annotate(self):

472

"""Return a list of (origin, text) for each content line."""

473

lines = self._lines[:]

474

if self._should_strip_eol:

475

origin, last_line = lines[-1]

476

lines[-1] = (origin, last_line.rstrip('\n'))

477

return lines

478

479

def apply_delta(self, delta, new_version_id):

480

"""Apply delta to this object to become new_version_id."""

481

offset = 0

482

lines = self._lines

483

for start, end, count, delta_lines in delta:

484

lines[offset+start:offset+end] = delta_lines

485

offset = offset + (start - end) + count

486

487

def text(self):

488

try:

489

lines = [text for origin, text in self._lines]

490

except ValueError, e:

491

# most commonly (only?) caused by the internal form of the knit

492

# missing annotation information because of a bug - see thread

493

# around 20071015

494

raise KnitCorrupt(self,

495

"line in annotated knit missing annotation information: %s"

496

% (e,))

497

if self._should_strip_eol:

498

lines[-1] = lines[-1].rstrip('\n')

499

return lines

500

501

def copy(self):

502

return AnnotatedKnitContent(self._lines[:])

503

504

505

class PlainKnitContent(KnitContent):

506

"""Unannotated content.

507

508

When annotate[_iter] is called on this content, the same version is reported

509

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

510

objects.

511

"""

512

513

def __init__(self, lines, version_id):

514

KnitContent.__init__(self)

515

self._lines = lines

516

self._version_id = version_id

517

518

def annotate(self):

519

"""Return a list of (origin, text) for each content line."""

520

return [(self._version_id, line) for line in self._lines]

521

522

def apply_delta(self, delta, new_version_id):

523

"""Apply delta to this object to become new_version_id."""

524

offset = 0

525

lines = self._lines

526

for start, end, count, delta_lines in delta:

527

lines[offset+start:offset+end] = delta_lines

528

offset = offset + (start - end) + count

529

self._version_id = new_version_id

530

531

def copy(self):

532

return PlainKnitContent(self._lines[:], self._version_id)

533

534

def text(self):

535

lines = self._lines

536

if self._should_strip_eol:

537

lines = lines[:]

538

lines[-1] = lines[-1].rstrip('\n')

539

return lines

147

540

148

541

149

542

class _KnitFactory(object):

150

"""Base factory for creating content objects."""

151

152

def make(self, lines, version):

153

num_lines = len(lines)

154

return KnitContent(zip([version] * num_lines, lines))

543

"""Base class for common Factory functions."""

544

545

def parse_record(self, version_id, record, record_details,

546

base_content, copy_base_content=True):

547

"""Parse a record into a full content object.

548

549

:param version_id: The official version id for this content

550

:param record: The data returned by read_records_iter()

551

:param record_details: Details about the record returned by

552

get_build_details

553

:param base_content: If get_build_details returns a compression_parent,

554

you must return a base_content here, else use None

555

:param copy_base_content: When building from the base_content, decide

556

you can either copy it and return a new object, or modify it in

557

place.

558

:return: (content, delta) A Content object and possibly a line-delta,

559

delta may be None

560

"""

561

method, noeol = record_details

562

if method == 'line-delta':

563

if copy_base_content:

564

content = base_content.copy()

565

else:

566

content = base_content

567

delta = self.parse_line_delta(record, version_id)

568

content.apply_delta(delta, version_id)

569

else:

570

content = self.parse_fulltext(record, version_id)

571

delta = None

572

content._should_strip_eol = noeol

573

return (content, delta)

155

574

156

575

157

576

class KnitAnnotateFactory(_KnitFactory):

159

578

160

579

annotated = True

161

580

162

def parse_fulltext(self, content, version):

581

def make(self, lines, version_id):

582

num_lines = len(lines)

583

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

584

585

def parse_fulltext(self, content, version_id):

163

586

"""Convert fulltext to internal representation

164

587

165

588

fulltext content is of the format

167

590

internal representation is of the format:

168

591

(revid, plaintext)

169

592

"""

170

decode_utf8 = cache_utf8.decode

171

lines = []

172

for line in content:

173

origin, text = line.split(' ', 1)

174

lines.append((decode_utf8(origin), text))

175

return KnitContent(lines)

593

# TODO: jam 20070209 The tests expect this to be returned as tuples,

594

# but the code itself doesn't really depend on that.

595

# Figure out a way to not require the overhead of turning the

596

# list back into tuples.

597

lines = [tuple(line.split(' ', 1)) for line in content]

598

return AnnotatedKnitContent(lines)

176

599

177

600

def parse_line_delta_iter(self, lines):

178

601

return iter(self.parse_line_delta(lines))

179

602

180

def parse_line_delta(self, lines, version):

603

def parse_line_delta(self, lines, version_id, plain=False):

181

604

"""Convert a line based delta into internal representation.

182

605

183

606

line delta is in the form of:

186

609

revid(utf8) newline\n

187

610

internal representation is

188

611

(start, end, count, [1..count tuples (revid, newline)])

612

613

:param plain: If True, the lines are returned as a plain

614

list without annotations, not as a list of (origin, content) tuples, i.e.

615

(start, end, count, [1..count newline])

189

616

"""

190

decode_utf8 = cache_utf8.decode

191

617

result = []

192

618

lines = iter(lines)

193

619

next = lines.next

620

621

cache = {}

622

def cache_and_return(line):

623

origin, text = line.split(' ', 1)

624

return cache.setdefault(origin, origin), text

625

194

626

# walk through the lines parsing.

627

# Note that the plain test is explicitly pulled out of the

628

# loop to minimise any performance impact

629

if plain:

630

for header in lines:

631

start, end, count = [int(n) for n in header.split(',')]

632

contents = [next().split(' ', 1)[1] for i in xrange(count)]

633

result.append((start, end, count, contents))

634

else:

635

for header in lines:

636

start, end, count = [int(n) for n in header.split(',')]

637

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

638

result.append((start, end, count, contents))

639

return result

640

641

def get_fulltext_content(self, lines):

642

"""Extract just the content lines from a fulltext."""

643

return (line.split(' ', 1)[1] for line in lines)

644

645

def get_linedelta_content(self, lines):

646

"""Extract just the content from a line delta.

647

648

This doesn't return all of the extra information stored in a delta.

649

Only the actual content lines.

650

"""

651

lines = iter(lines)

652

next = lines.next

195

653

for header in lines:

196

start, end, count = [int(n) for n in header.split(',')]

197

contents = []

198

remaining = count

199

while remaining:

654

header = header.split(',')

655

count = int(header[2])

656

for i in xrange(count):

200

657

origin, text = next().split(' ', 1)

201

remaining -= 1

202

contents.append((decode_utf8(origin), text))

203

result.append((start, end, count, contents))

204

return result

658

yield text

205

659

206

660

def lower_fulltext(self, content):

207

661

"""convert a fulltext content record into a serializable form.

208

662

209

663

see parse_fulltext which this inverts.

210

664

"""

211

encode_utf8 = cache_utf8.encode

212

return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]

665

return ['%s %s' % (o, t) for o, t in content._lines]

213

666

214

667

def lower_line_delta(self, delta):

215

668

"""convert a delta into a serializable form.

216

669

217

670

See parse_line_delta which this inverts.

218

671

"""

219

encode_utf8 = cache_utf8.encode

672

# TODO: jam 20070209 We only do the caching thing to make sure that

673

# the origin is a valid utf-8 line, eventually we could remove it

220

674

out = []

221

675

for start, end, c, lines in delta:

222

676

out.append('%d,%d,%d\n' % (start, end, c))

223

out.extend(encode_utf8(origin) + ' ' + text

677

out.extend(origin + ' ' + text

224

678

for origin, text in lines)

225

679

return out

226

680

681

def annotate(self, knit, key):

682

content = knit._get_content(key)

683

# adjust for the fact that serialised annotations are only key suffixes

684

# for this factory.

685

if type(key) is tuple:

686

prefix = key[:-1]

687

origins = content.annotate()

688

result = []

689

for origin, line in origins:

690

result.append((prefix + (origin,), line))

691

return result

692

else:

693

# XXX: This smells a bit. Why would key ever be a non-tuple here?

694

# Aren't keys defined to be tuples? -- spiv 20080618

695

return content.annotate()

696

227

697

228

698

class KnitPlainFactory(_KnitFactory):

229

699

"""Factory for creating plain Content objects."""

230

700

231

701

annotated = False

232

702

233

def parse_fulltext(self, content, version):

703

def make(self, lines, version_id):

704

return PlainKnitContent(lines, version_id)

705

706

def parse_fulltext(self, content, version_id):

234

707

"""This parses an unannotated fulltext.

235

708

236

709

Note that this is not a noop - the internal representation

237

710

has (versionid, line) - its just a constant versionid.

238

711

"""

239

return self.make(content, version)

712

return self.make(content, version_id)

240

713

241

def parse_line_delta_iter(self, lines, version):

714

def parse_line_delta_iter(self, lines, version_id):

242

715

cur = 0

243

716

num_lines = len(lines)

244

717

while cur < num_lines:

245

718

header = lines[cur]

246

719

cur += 1

247

720

start, end, c = [int(n) for n in header.split(',')]

248

yield start, end, c, zip([version] * c, lines[cur:cur+c])

721

yield start, end, c, lines[cur:cur+c]

249

722

cur += c

250

723

251

def parse_line_delta(self, lines, version):

252

return list(self.parse_line_delta_iter(lines, version))

724

def parse_line_delta(self, lines, version_id):

725

return list(self.parse_line_delta_iter(lines, version_id))

726

727

def get_fulltext_content(self, lines):

728

"""Extract just the content lines from a fulltext."""

729

return iter(lines)

730

731

def get_linedelta_content(self, lines):

732

"""Extract just the content from a line delta.

733

734

This doesn't return all of the extra information stored in a delta.

735

Only the actual content lines.

736

"""

737

lines = iter(lines)

738

next = lines.next

739

for header in lines:

740

header = header.split(',')

741

count = int(header[2])

742

for i in xrange(count):

743

yield next()

253

744

254

745

def lower_fulltext(self, content):

255

746

return content.text()

258

749

out = []

259

750

for start, end, c, lines in delta:

260

751

out.append('%d,%d,%d\n' % (start, end, c))

261

out.extend([text for origin, text in lines])

752

out.extend(lines)

262

753

return out

263

754

264

265

def make_empty_knit(transport, relpath):

266

"""Construct a empty knit at the specified location."""

267

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

268

k._data._open_file()

269

270

271

class KnitVersionedFile(VersionedFile):

272

"""Weave-like structure with faster random access.

273

274

A knit stores a number of texts and a summary of the relationships

275

between them. Texts are identified by a string version-id. Texts

276

are normally stored and retrieved as a series of lines, but can

277

also be passed as single strings.

278

279

Lines are stored with the trailing newline (if any) included, to

280

avoid special cases for files with no final newline. Lines are

281

composed of 8-bit characters, not unicode. The combination of

282

these approaches should mean any 'binary' file can be safely

283

stored and retrieved.

284

"""

285

286

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

287

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

288

create=False, create_parent_dir=False, delay_create=False,

289

dir_mode=None):

290

"""Construct a knit at location specified by relpath.

291

292

:param create: If not True, only open an existing knit.

293

:param create_parent_dir: If True, create the parent directory if

294

creating the file fails. (This is used for stores with

295

hash-prefixes that may not exist yet)

296

:param delay_create: The calling code is aware that the knit won't

297

actually be created until the first data is stored.

755

def annotate(self, knit, key):

756

annotator = _KnitAnnotator(knit)

757

return annotator.annotate_flat(key)

758

759

760

761

def make_file_factory(annotated, mapper):

762

"""Create a factory for creating a file based KnitVersionedFiles.

763

764

This is only functional enough to run interface tests, it doesn't try to

765

provide a full pack environment.

766

767

:param annotated: knit annotations are wanted.

768

:param mapper: The mapper from keys to paths.

769

"""

770

def factory(transport):

771

index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)

772

access = _KnitKeyAccess(transport, mapper)

773

return KnitVersionedFiles(index, access, annotated=annotated)

774

return factory

775

776

777

def make_pack_factory(graph, delta, keylength):

778

"""Create a factory for creating a pack based VersionedFiles.

779

780

This is only functional enough to run interface tests, it doesn't try to

781

provide a full pack environment.

782

783

:param graph: Store a graph.

784

:param delta: Delta compress contents.

785

:param keylength: How long should keys be.

786

"""

787

def factory(transport):

788

parents = graph or delta

789

ref_length = 0

790

if graph:

791

ref_length += 1

792

if delta:

793

ref_length += 1

794

max_delta_chain = 200

795

else:

796

max_delta_chain = 0

797

graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,

798

key_elements=keylength)

799

stream = transport.open_write_stream('newpack')

800

writer = pack.ContainerWriter(stream.write)

801

writer.begin()

802

index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,

803

deltas=delta, add_callback=graph_index.add_nodes)

804

access = pack_repo._DirectPackAccess({})

805

access.set_writer(writer, graph_index, (transport, 'newpack'))

806

result = KnitVersionedFiles(index, access,

807

max_delta_chain=max_delta_chain)

808

result.stream = stream

809

result.writer = writer

810

return result

811

return factory

812

813

814

def cleanup_pack_knit(versioned_files):

815

versioned_files.stream.close()

816

versioned_files.writer.end()

817

818

819

def _get_total_build_size(self, keys, positions):

820

"""Determine the total bytes to build these keys.

821

822

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

823

don't inherit from a common base.)

824

825

:param keys: Keys that we want to build

826

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

827

as returned by _get_components_positions)

828

:return: Number of bytes to build those keys

829

"""

830

all_build_index_memos = {}

831

build_keys = keys

832

while build_keys:

833

next_keys = set()

834

for key in build_keys:

835

# This is mostly for the 'stacked' case

836

# Where we will be getting the data from a fallback

837

if key not in positions:

838

continue

839

_, index_memo, compression_parent = positions[key]

840

all_build_index_memos[key] = index_memo

841

if compression_parent not in all_build_index_memos:

842

next_keys.add(compression_parent)

843

build_keys = next_keys

844

return sum([index_memo[2] for index_memo

845

in all_build_index_memos.itervalues()])

846

847

848

class KnitVersionedFiles(VersionedFiles):

849

"""Storage for many versioned files using knit compression.

850

851

Backend storage is managed by indices and data objects.

852

853

:ivar _index: A _KnitGraphIndex or similar that can describe the

854

parents, graph, compression and data location of entries in this

855

KnitVersionedFiles. Note that this is only the index for

856

*this* vfs; if there are fallbacks they must be queried separately.

857

"""

858

859

def __init__(self, index, data_access, max_delta_chain=200,

860

annotated=False, reload_func=None):

861

"""Create a KnitVersionedFiles with index and data_access.

862

863

:param index: The index for the knit data.

864

:param data_access: The access object to store and retrieve knit

865

records.

866

:param max_delta_chain: The maximum number of deltas to permit during

867

insertion. Set to 0 to prohibit the use of deltas.

868

:param annotated: Set to True to cause annotations to be calculated and

869

stored during insertion.

870

:param reload_func: An function that can be called if we think we need

871

to reload the pack listing and try again. See

872

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

298

873

"""

299

if deprecated_passed(basis_knit):

300

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

301

" deprecated as of bzr 0.9.",

302

DeprecationWarning, stacklevel=2)

303

if access_mode is None:

304

access_mode = 'w'

305

super(KnitVersionedFile, self).__init__(access_mode)

306

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

307

self.transport = transport

308

self.filename = relpath

309

self.factory = factory or KnitAnnotateFactory()

310

self.writable = (access_mode == 'w')

311

self.delta = delta

312

313

self._max_delta_chain = 200

314

315

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

316

access_mode, create=create, file_mode=file_mode,

317

create_parent_dir=create_parent_dir, delay_create=delay_create,

318

dir_mode=dir_mode)

319

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

320

access_mode, create=create and not len(self), file_mode=file_mode,

321

create_parent_dir=create_parent_dir, delay_create=delay_create,

322

dir_mode=dir_mode)

874

self._index = index

875

self._access = data_access

876

self._max_delta_chain = max_delta_chain

877

if annotated:

878

self._factory = KnitAnnotateFactory()

879

else:

880

self._factory = KnitPlainFactory()

881

self._immediate_fallback_vfs = []

882

self._reload_func = reload_func

323

883

324

884

def __repr__(self):

325

return '%s(%s)' % (self.__class__.__name__,

326

self.transport.abspath(self.filename))

327

328

def _check_should_delta(self, first_parents):

885

return "%s(%r, %r)" % (

886

self.__class__.__name__,

887

self._index,

888

self._access)

889

890

def add_fallback_versioned_files(self, a_versioned_files):

891

"""Add a source of texts for texts not present in this knit.

892

893

:param a_versioned_files: A VersionedFiles object.

894

"""

895

self._immediate_fallback_vfs.append(a_versioned_files)

896

897

def add_lines(self, key, parents, lines, parent_texts=None,

898

left_matching_blocks=None, nostore_sha=None, random_id=False,

899

check_content=True):

900

"""See VersionedFiles.add_lines()."""

901

self._index._check_write_ok()

902

self._check_add(key, lines, random_id, check_content)

903

if parents is None:

904

# The caller might pass None if there is no graph data, but kndx

905

# indexes can't directly store that, so we give them

906

# an empty tuple instead.

907

parents = ()

908

line_bytes = ''.join(lines)

909

return self._add(key, lines, parents,

910

parent_texts, left_matching_blocks, nostore_sha, random_id,

911

line_bytes=line_bytes)

912

913

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

914

"""See VersionedFiles._add_text()."""

915

self._index._check_write_ok()

916

self._check_add(key, None, random_id, check_content=False)

917

if text.__class__ is not str:

918

raise errors.BzrBadParameterUnicode("text")

919

if parents is None:

920

# The caller might pass None if there is no graph data, but kndx

921

# indexes can't directly store that, so we give them

922

# an empty tuple instead.

923

parents = ()

924

return self._add(key, None, parents,

925

None, None, nostore_sha, random_id,

926

line_bytes=text)

927

928

def _add(self, key, lines, parents, parent_texts,

929

left_matching_blocks, nostore_sha, random_id,

930

line_bytes):

931

"""Add a set of lines on top of version specified by parents.

932

933

Any versions not present will be converted into ghosts.

934

935

:param lines: A list of strings where each one is a single line (has a

936

single newline at the end of the string) This is now optional

937

(callers can pass None). It is left in its location for backwards

938

compatibility. It should ''.join(lines) must == line_bytes

939

:param line_bytes: A single string containing the content

940

941

We pass both lines and line_bytes because different routes bring the

942

values to this function. And for memory efficiency, we don't want to

943

have to split/join on-demand.

944

"""

945

# first thing, if the content is something we don't need to store, find

946

# that out.

947

digest = sha_string(line_bytes)

948

if nostore_sha == digest:

949

raise errors.ExistingContent

950

951

present_parents = []

952

if parent_texts is None:

953

parent_texts = {}

954

# Do a single query to ascertain parent presence; we only compress

955

# against parents in the same kvf.

956

present_parent_map = self._index.get_parent_map(parents)

957

for parent in parents:

958

if parent in present_parent_map:

959

present_parents.append(parent)

960

961

# Currently we can only compress against the left most present parent.

962

if (len(present_parents) == 0 or

963

present_parents[0] != parents[0]):

964

delta = False

965

else:

966

# To speed the extract of texts the delta chain is limited

967

# to a fixed number of deltas. This should minimize both

968

# I/O and the time spend applying deltas.

969

delta = self._check_should_delta(present_parents[0])

970

971

text_length = len(line_bytes)

972

options = []

973

no_eol = False

974

# Note: line_bytes is not modified to add a newline, that is tracked

975

# via the no_eol flag. 'lines' *is* modified, because that is the

976

# general values needed by the Content code.

977

if line_bytes and line_bytes[-1] != '\n':

978

options.append('no-eol')

979

no_eol = True

980

# Copy the existing list, or create a new one

981

if lines is None:

982

lines = osutils.split_lines(line_bytes)

983

else:

984

lines = lines[:]

985

# Replace the last line with one that ends in a final newline

986

lines[-1] = lines[-1] + '\n'

987

if lines is None:

988

lines = osutils.split_lines(line_bytes)

989

990

for element in key[:-1]:

991

if type(element) is not str:

992

raise TypeError("key contains non-strings: %r" % (key,))

993

if key[-1] is None:

994

key = key[:-1] + ('sha1:' + digest,)

995

elif type(key[-1]) is not str:

996

raise TypeError("key contains non-strings: %r" % (key,))

997

# Knit hunks are still last-element only

998

version_id = key[-1]

999

content = self._factory.make(lines, version_id)

1000

if no_eol:

1001

# Hint to the content object that its text() call should strip the

1002

# EOL.

1003

content._should_strip_eol = True

1004

if delta or (self._factory.annotated and len(present_parents) > 0):

1005

# Merge annotations from parent texts if needed.

1006

delta_hunks = self._merge_annotations(content, present_parents,

1007

parent_texts, delta, self._factory.annotated,

1008

left_matching_blocks)

1009

1010

if delta:

1011

options.append('line-delta')

1012

store_lines = self._factory.lower_line_delta(delta_hunks)

1013

size, bytes = self._record_to_data(key, digest,

1014

store_lines)

1015

else:

1016

options.append('fulltext')

1017

# isinstance is slower and we have no hierarchy.

1018

if self._factory.__class__ is KnitPlainFactory:

1019

# Use the already joined bytes saving iteration time in

1020

# _record_to_data.

1021

dense_lines = [line_bytes]

1022

if no_eol:

1023

dense_lines.append('\n')

1024

size, bytes = self._record_to_data(key, digest,

1025

lines, dense_lines)

1026

else:

1027

# get mixed annotation + content and feed it into the

1028

# serialiser.

1029

store_lines = self._factory.lower_fulltext(content)

1030

size, bytes = self._record_to_data(key, digest,

1031

store_lines)

1032

1033

access_memo = self._access.add_raw_records([(key, size)], bytes)[0]

1034

self._index.add_records(

1035

((key, options, access_memo, parents),),

1036

random_id=random_id)

1037

return digest, text_length, content

1038

1039

def annotate(self, key):

1040

"""See VersionedFiles.annotate."""

1041

return self._factory.annotate(self, key)

1042

1043

def get_annotator(self):

1044

return _KnitAnnotator(self)

1045

1046

def check(self, progress_bar=None, keys=None):

1047

"""See VersionedFiles.check()."""

1048

if keys is None:

1049

return self._logical_check()

1050

else:

1051

# At the moment, check does not extra work over get_record_stream

1052

return self.get_record_stream(keys, 'unordered', True)

1053

1054

def _logical_check(self):

1055

# This doesn't actually test extraction of everything, but that will

1056

# impact 'bzr check' substantially, and needs to be integrated with

1057

# care. However, it does check for the obvious problem of a delta with

1058

# no basis.

1059

keys = self._index.keys()

1060

parent_map = self.get_parent_map(keys)

1061

for key in keys:

1062

if self._index.get_method(key) != 'fulltext':

1063

compression_parent = parent_map[key][0]

1064

if compression_parent not in parent_map:

1065

raise errors.KnitCorrupt(self,

1066

"Missing basis parent %s for %s" % (

1067

compression_parent, key))

1068

for fallback_vfs in self._immediate_fallback_vfs:

1069

fallback_vfs.check()

1070

1071

def _check_add(self, key, lines, random_id, check_content):

1072

"""check that version_id and lines are safe to add."""

1073

version_id = key[-1]

1074

if version_id is not None:

1075

if contains_whitespace(version_id):

1076

raise InvalidRevisionId(version_id, self)

1077

self.check_not_reserved_id(version_id)

1078

# TODO: If random_id==False and the key is already present, we should

1079

# probably check that the existing content is identical to what is

1080

# being inserted, and otherwise raise an exception. This would make

1081

# the bundle code simpler.

1082

if check_content:

1083

self._check_lines_not_unicode(lines)

1084

self._check_lines_are_lines(lines)

1085

1086

def _check_header(self, key, line):

1087

rec = self._split_header(line)

1088

self._check_header_version(rec, key[-1])

1089

return rec

1090

1091

def _check_header_version(self, rec, version_id):

1092

"""Checks the header version on original format knit records.

1093

1094

These have the last component of the key embedded in the record.

1095

"""

1096

if rec[1] != version_id:

1097

raise KnitCorrupt(self,

1098

'unexpected version, wanted %r, got %r' % (version_id, rec[1]))

1099

1100

def _check_should_delta(self, parent):

329

1101

"""Iterate back through the parent listing, looking for a fulltext.

330

1102

331

1103

This is used when we want to decide whether to add a delta or a new

338

1110

"""

339

1111

delta_size = 0

340

1112

fulltext_size = None

341

delta_parents = first_parents

342

1113

for count in xrange(self._max_delta_chain):

343

parent = delta_parents[0]

344

method = self._index.get_method(parent)

345

pos, size = self._index.get_position(parent)

346

if method == 'fulltext':

1114

try:

1115

# Note that this only looks in the index of this particular

1116

# KnitVersionedFiles, not in the fallbacks. This ensures that

1117

# we won't store a delta spanning physical repository

1118

# boundaries.

1119

build_details = self._index.get_build_details([parent])

1120

parent_details = build_details[parent]

1121

except (RevisionNotPresent, KeyError), e:

1122

# Some basis is not locally present: always fulltext

1123

return False

1124

index_memo, compression_parent, _, _ = parent_details

1125

_, _, size = index_memo

1126

if compression_parent is None:

347

1127

fulltext_size = size

348

1128

break

349

1129

delta_size += size

350

delta_parents = self._index.get_parents(parent)

1130

# We don't explicitly check for presence because this is in an

1131

# inner loop, and if it's missing it'll fail anyhow.

1132

parent = compression_parent

351

1133

else:

352

1134

# We couldn't find a fulltext, so we must create a new one

353

1135

return False

354

1136

# Simple heuristic - if the total I/O wold be greater as a delta than

1137

# the originally installed fulltext, we create a new fulltext.

355

1138

return fulltext_size > delta_size

356

1139

357

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

358

"""See VersionedFile._add_delta()."""

359

self._check_add(version_id, []) # should we check the lines ?

360

self._check_versions_present(parents)

361

present_parents = []

362

ghosts = []

363

parent_texts = {}

364

for parent in parents:

365

if not self.has_version(parent):

366

ghosts.append(parent)

367

else:

368

present_parents.append(parent)

369

370

if delta_parent is None:

371

# reconstitute as full text.

372

assert len(delta) == 1 or len(delta) == 0

373

if len(delta):

374

assert delta[0][0] == 0

375

assert delta[0][1] == 0, delta[0][1]

376

return super(KnitVersionedFile, self)._add_delta(version_id,

377

parents,

378

delta_parent,

379

sha1,

380

noeol,

381

delta)

382

383

digest = sha1

384

385

options = []

386

if noeol:

387

options.append('no-eol')

388

389

if delta_parent is not None:

390

# determine the current delta chain length.

391

# To speed the extract of texts the delta chain is limited

392

# to a fixed number of deltas. This should minimize both

393

# I/O and the time spend applying deltas.

394

# The window was changed to a maximum of 200 deltas, but also added

395

# was a check that the total compressed size of the deltas is

396

# smaller than the compressed size of the fulltext.

397

if not self._check_should_delta([delta_parent]):

398

# We don't want a delta here, just do a normal insertion.

399

return super(KnitVersionedFile, self)._add_delta(version_id,

400

parents,

401

delta_parent,

402

sha1,

403

noeol,

404

delta)

405

406

options.append('line-delta')

407

store_lines = self.factory.lower_line_delta(delta)

408

409

where, size = self._data.add_record(version_id, digest, store_lines)

410

self._index.add_version(version_id, options, where, size, parents)

411

412

def _add_raw_records(self, records, data):

413

"""Add all the records 'records' with data pre-joined in 'data'.

414

415

:param records: A list of tuples(version_id, options, parents, size).

416

:param data: The data for the records. When it is written, the records

417

are adjusted to have pos pointing into data by the sum of

418

the preceding records sizes.

419

"""

420

# write all the data

421

pos = self._data.add_raw_record(data)

422

offset = 0

423

index_entries = []

424

for (version_id, options, parents, size) in records:

425

index_entries.append((version_id, options, pos+offset,

426

size, parents))

427

if self._data._do_cache:

428

self._data._cache[version_id] = data[offset:offset+size]

429

offset += size

430

self._index.add_versions(index_entries)

431

432

def enable_cache(self):

433

"""Start caching data for this knit"""

434

self._data.enable_cache()

435

436

def clear_cache(self):

437

"""Clear the data cache only."""

438

self._data.clear_cache()

439

440

def copy_to(self, name, transport):

441

"""See VersionedFile.copy_to()."""

442

# copy the current index to a temp index to avoid racing with local

443

# writes

444

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

445

self.transport.get(self._index._filename))

446

# copy the data file

447

f = self._data._open_file()

448

try:

449

transport.put_file(name + DATA_SUFFIX, f)

450

finally:

451

f.close()

452

# move the copied index into place

453

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

454

455

def create_empty(self, name, transport, mode=None):

456

return KnitVersionedFile(name, transport, factory=self.factory,

457

delta=self.delta, create=True)

458

459

def _fix_parents(self, version, new_parents):

460

"""Fix the parents list for version.

461

462

This is done by appending a new version to the index

463

with identical data except for the parents list.

464

the parents list must be a superset of the current

465

list.

466

"""

467

current_values = self._index._cache[version]

468

assert set(current_values[4]).difference(set(new_parents)) == set()

469

self._index.add_version(version,

470

current_values[1],

471

current_values[2],

472

current_values[3],

473

new_parents)

474

475

def get_delta(self, version_id):

476

"""Get a delta for constructing version from some other version."""

477

if not self.has_version(version_id):

478

raise RevisionNotPresent(version_id, self.filename)

479

480

parents = self.get_parents(version_id)

481

if len(parents):

482

parent = parents[0]

483

else:

484

parent = None

485

data_pos, data_size = self._index.get_position(version_id)

486

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

487

version_idx = self._index.lookup(version_id)

488

noeol = 'no-eol' in self._index.get_options(version_id)

489

if 'fulltext' == self._index.get_method(version_id):

490

new_content = self.factory.parse_fulltext(data, version_idx)

491

if parent is not None:

492

reference_content = self._get_content(parent)

493

old_texts = reference_content.text()

494

else:

495

old_texts = []

496

new_texts = new_content.text()

497

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

498

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

499

else:

500

delta = self.factory.parse_line_delta(data, version_idx)

501

return parent, sha1, noeol, delta

502

503

def get_graph_with_ghosts(self):

504

"""See VersionedFile.get_graph_with_ghosts()."""

505

graph_items = self._index.get_graph()

506

return dict(graph_items)

507

508

def get_sha1(self, version_id):

509

"""See VersionedFile.get_sha1()."""

510

record_map = self._get_record_map([version_id])

511

method, content, digest, next = record_map[version_id]

512

return digest

513

514

@staticmethod

515

def get_suffixes():

516

"""See VersionedFile.get_suffixes()."""

517

return [DATA_SUFFIX, INDEX_SUFFIX]

518

519

def has_ghost(self, version_id):

520

"""True if there is a ghost reference in the file to version_id."""

521

# maybe we have it

522

if self.has_version(version_id):

523

return False

524

# optimisable if needed by memoising the _ghosts set.

525

items = self._index.get_graph()

526

for node, parents in items:

527

for parent in parents:

528

if parent not in self._index._cache:

529

if parent == version_id:

530

return True

531

return False

532

533

def versions(self):

534

"""See VersionedFile.versions."""

535

return self._index.get_versions()

536

537

def has_version(self, version_id):

538

"""See VersionedFile.has_version."""

539

return self._index.has_version(version_id)

540

541

__contains__ = has_version

542

543

def _merge_annotations(self, content, parents, parent_texts={},

544

delta=None, annotated=None):

545

"""Merge annotations for content. This is done by comparing

546

the annotations based on changed to the text.

547

"""

548

if annotated:

549

delta_seq = None

550

for parent_id in parents:

551

merge_content = self._get_content(parent_id, parent_texts)

552

seq = patiencediff.PatienceSequenceMatcher(

553

None, merge_content.text(), content.text())

554

if delta_seq is None:

555

# setup a delta seq to reuse.

556

delta_seq = seq

557

for i, j, n in seq.get_matching_blocks():

558

if n == 0:

559

continue

560

# this appears to copy (origin, text) pairs across to the new

561

# content for any line that matches the last-checked parent.

562

# FIXME: save the sequence control data for delta compression

563

# against the most relevant parent rather than rediffing.

564

content._lines[j:j+n] = merge_content._lines[i:i+n]

565

if delta:

566

if not annotated:

567

reference_content = self._get_content(parents[0], parent_texts)

568

new_texts = content.text()

569

old_texts = reference_content.text()

570

delta_seq = patiencediff.PatienceSequenceMatcher(

571

None, old_texts, new_texts)

572

return self._make_line_delta(delta_seq, content)

573

574

def _make_line_delta(self, delta_seq, new_content):

575

"""Generate a line delta from delta_seq and new_content."""

576

diff_hunks = []

577

for op in delta_seq.get_opcodes():

578

if op[0] == 'equal':

579

continue

580

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

581

return diff_hunks

582

583

def _get_components_positions(self, version_ids):

584

"""Produce a map of position data for the components of versions.

1140

def _build_details_to_components(self, build_details):

1141

"""Convert a build_details tuple to a position tuple."""

1142

# record_details, access_memo, compression_parent

1143

return build_details[3], build_details[0], build_details[1]

1144

1145

def _get_components_positions(self, keys, allow_missing=False):

1146

"""Produce a map of position data for the components of keys.

585

1147

586

1148

This data is intended to be used for retrieving the knit records.

587

1149

588

A dict of version_id to (method, data_pos, data_size, next) is

1150

A dict of key to (record_details, index_memo, next, parents) is

589

1151

returned.

590

1152

method is the way referenced data should be applied.

591

data_pos is the position of the data in the knit.

592

data_size is the size of the data in the knit.

1153

index_memo is the handle to pass to the data access to actually get the

1154

data

593

1155

next is the build-parent of the version, or None for fulltexts.

1156

parents is the version_ids of the parents of this version

1157

1158

:param allow_missing: If True do not raise an error on a missing component,

1159

just ignore it.

594

1160

"""

595

1161

component_data = {}

596

for version_id in version_ids:

597

cursor = version_id

598

599

while cursor is not None and cursor not in component_data:

600

method = self._index.get_method(cursor)

601

if method == 'fulltext':

602

next = None

603

else:

604

next = self.get_parents(cursor)[0]

605

data_pos, data_size = self._index.get_position(cursor)

606

component_data[cursor] = (method, data_pos, data_size, next)

607

cursor = next

1162

pending_components = keys

1163

while pending_components:

1164

build_details = self._index.get_build_details(pending_components)

1165

current_components = set(pending_components)

1166

pending_components = set()

1167

for key, details in build_details.iteritems():

1168

(index_memo, compression_parent, parents,

1169

record_details) = details

1170

method = record_details[0]

1171

if compression_parent is not None:

1172

pending_components.add(compression_parent)

1173

component_data[key] = self._build_details_to_components(details)

1174

missing = current_components.difference(build_details)

1175

if missing and not allow_missing:

1176

raise errors.RevisionNotPresent(missing.pop(), self)

608

1177

return component_data

609

610

def _get_content(self, version_id, parent_texts={}):

1178

1179

def _get_content(self, key, parent_texts={}):

611

1180

"""Returns a content object that makes up the specified

612

1181

version."""

613

if not self.has_version(version_id):

614

raise RevisionNotPresent(version_id, self.filename)

615

616

cached_version = parent_texts.get(version_id, None)

1182

cached_version = parent_texts.get(key, None)

617

1183

if cached_version is not None:

1184

# Ensure the cache dict is valid.

1185

if not self.get_parent_map([key]):

1186

raise RevisionNotPresent(key, self)

618

1187

return cached_version

619

620

text_map, contents_map = self._get_content_maps([version_id])

621

return contents_map[version_id]

622

623

def _check_versions_present(self, version_ids):

624

"""Check that all specified versions are present."""

625

version_ids = set(version_ids)

626

for r in list(version_ids):

627

if self._index.has_version(r):

628

version_ids.remove(r)

629

if version_ids:

630

raise RevisionNotPresent(list(version_ids)[0], self.filename)

631

632

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

633

"""See VersionedFile.add_lines_with_ghosts()."""

634

self._check_add(version_id, lines)

635

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

636

637

def _add_lines(self, version_id, parents, lines, parent_texts):

638

"""See VersionedFile.add_lines."""

639

self._check_add(version_id, lines)

640

self._check_versions_present(parents)

641

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

642

643

def _check_add(self, version_id, lines):

644

"""check that version_id and lines are safe to add."""

645

assert self.writable, "knit is not opened for write"

646

### FIXME escape. RBC 20060228

647

if contains_whitespace(version_id):

648

raise InvalidRevisionId(version_id, self.filename)

649

if self.has_version(version_id):

650

raise RevisionAlreadyPresent(version_id, self.filename)

651

self._check_lines_not_unicode(lines)

652

self._check_lines_are_lines(lines)

653

654

def _add(self, version_id, lines, parents, delta, parent_texts):

655

"""Add a set of lines on top of version specified by parents.

656

657

If delta is true, compress the text as a line-delta against

658

the first parent.

659

660

Any versions not present will be converted into ghosts.

661

"""

662

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

663

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

664

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

665

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

666

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

667

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

668

# +1383 0 8.0370 8.0370 +<len>

669

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

670

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

671

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

672

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

673

674

present_parents = []

675

ghosts = []

676

if parent_texts is None:

677

parent_texts = {}

678

for parent in parents:

679

if not self.has_version(parent):

680

ghosts.append(parent)

681

else:

682

present_parents.append(parent)

683

684

if delta and not len(present_parents):

685

delta = False

686

687

digest = sha_strings(lines)

688

options = []

689

if lines:

690

if lines[-1][-1] != '\n':

691

options.append('no-eol')

692

lines[-1] = lines[-1] + '\n'

693

694

if len(present_parents) and delta:

695

# To speed the extract of texts the delta chain is limited

696

# to a fixed number of deltas. This should minimize both

697

# I/O and the time spend applying deltas.

698

delta = self._check_should_delta(present_parents)

699

700

lines = self.factory.make(lines, version_id)

701

if delta or (self.factory.annotated and len(present_parents) > 0):

702

# Merge annotations from parent texts if so is needed.

703

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

704

delta, self.factory.annotated)

705

706

if delta:

707

options.append('line-delta')

708

store_lines = self.factory.lower_line_delta(delta_hunks)

709

else:

710

options.append('fulltext')

711

store_lines = self.factory.lower_fulltext(lines)

712

713

where, size = self._data.add_record(version_id, digest, store_lines)

714

self._index.add_version(version_id, options, where, size, parents)

715

return lines

716

717

def check(self, progress_bar=None):

718

"""See VersionedFile.check()."""

719

720

def _clone_text(self, new_version_id, old_version_id, parents):

721

"""See VersionedFile.clone_text()."""

722

# FIXME RBC 20060228 make fast by only inserting an index with null

723

# delta.

724

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

725

726

def get_lines(self, version_id):

727

"""See VersionedFile.get_lines()."""

728

return self.get_line_list([version_id])[0]

729

730

def _get_record_map(self, version_ids):

1188

generator = _VFContentMapGenerator(self, [key])

1189

return generator._get_content(key)

1190

1191

def get_known_graph_ancestry(self, keys):

1192

"""Get a KnownGraph instance with the ancestry of keys."""

1193

parent_map, missing_keys = self._index.find_ancestry(keys)

1194

for fallback in self._transitive_fallbacks():

1195

if not missing_keys:

1196

break

1197

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1198

missing_keys)

1199

parent_map.update(f_parent_map)

1200

missing_keys = f_missing_keys

1201

kg = _mod_graph.KnownGraph(parent_map)

1202

return kg

1203

1204

def get_parent_map(self, keys):

1205

"""Get a map of the graph parents of keys.

1206

1207

:param keys: The keys to look up parents for.

1208

:return: A mapping from keys to parents. Absent keys are absent from

1209

the mapping.

1210

"""

1211

return self._get_parent_map_with_sources(keys)[0]

1212

1213

def _get_parent_map_with_sources(self, keys):

1214

"""Get a map of the parents of keys.

1215

1216

:param keys: The keys to look up parents for.

1217

:return: A tuple. The first element is a mapping from keys to parents.

1218

Absent keys are absent from the mapping. The second element is a

1219

list with the locations each key was found in. The first element

1220

is the in-this-knit parents, the second the first fallback source,

1221

and so on.

1222

"""

1223

result = {}

1224

sources = [self._index] + self._immediate_fallback_vfs

1225

source_results = []

1226

missing = set(keys)

1227

for source in sources:

1228

if not missing:

1229

break

1230

new_result = source.get_parent_map(missing)

1231

source_results.append(new_result)

1232

result.update(new_result)

1233

missing.difference_update(set(new_result))

1234

return result, source_results

1235

1236

def _get_record_map(self, keys, allow_missing=False):

731

1237

"""Produce a dictionary of knit records.

732

733

The keys are version_ids, the values are tuples of (method, content,

734

digest, next).

735

method is the way the content should be applied.

736

content is a KnitContent object.

737

digest is the SHA1 digest of this version id after all steps are done

738

next is the build-parent of the version, i.e. the leftmost ancestor.

739

If the method is fulltext, next will be None.

740

"""

741

position_map = self._get_components_positions(version_ids)

742

# c = component_id, m = method, p = position, s = size, n = next

743

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

744

record_map = {}

745

for component_id, content, digest in \

746

self._data.read_records_iter(records):

747

method, position, size, next = position_map[component_id]

748

record_map[component_id] = method, content, digest, next

749

750

return record_map

751

752

def get_text(self, version_id):

753

"""See VersionedFile.get_text"""

754

return self.get_texts([version_id])[0]

755

756

def get_texts(self, version_ids):

757

return [''.join(l) for l in self.get_line_list(version_ids)]

758

759

def get_line_list(self, version_ids):

760

"""Return the texts of listed versions as a list of strings."""

761

text_map, content_map = self._get_content_maps(version_ids)

762

return [text_map[v] for v in version_ids]

763

764

def _get_content_maps(self, version_ids):

765

"""Produce maps of text and KnitContents

766

1238

1239

:return: {key:(record, record_details, digest, next)}

1240

record

1241

data returned from read_records (a KnitContentobject)

1242

record_details

1243

opaque information to pass to parse_record

1244

digest

1245

SHA1 digest of the full text after all steps are done

1246

1247

build-parent of the version, i.e. the leftmost ancestor.

1248

Will be None if the record is not a delta.

1249

:param keys: The keys to build a map for

1250

:param allow_missing: If some records are missing, rather than

1251

error, just return the data that could be generated.

1252

"""

1253

raw_map = self._get_record_map_unparsed(keys,

1254

allow_missing=allow_missing)

1255

return self._raw_map_to_record_map(raw_map)

1256

1257

def _raw_map_to_record_map(self, raw_map):

1258

"""Parse the contents of _get_record_map_unparsed.

1259

1260

:return: see _get_record_map.

1261

"""

1262

result = {}

1263

for key in raw_map:

1264

data, record_details, next = raw_map[key]

1265

content, digest = self._parse_record(key[-1], data)

1266

result[key] = content, record_details, digest, next

1267

return result

1268

1269

def _get_record_map_unparsed(self, keys, allow_missing=False):

1270

"""Get the raw data for reconstructing keys without parsing it.

1271

1272

:return: A dict suitable for parsing via _raw_map_to_record_map.

1273

key-> raw_bytes, (method, noeol), compression_parent

1274

"""

1275

# This retries the whole request if anything fails. Potentially we

1276

# could be a bit more selective. We could track the keys whose records

1277

# we have successfully found, and then only request the new records

1278

# from there. However, _get_components_positions grabs the whole build

1279

# chain, which means we'll likely try to grab the same records again

1280

# anyway. Also, can the build chains change as part of a pack

1281

# operation? We wouldn't want to end up with a broken chain.

1282

while True:

1283

try:

1284

position_map = self._get_components_positions(keys,

1285

allow_missing=allow_missing)

1286

# key = component_id, r = record_details, i_m = index_memo,

1287

# n = next

1288

records = [(key, i_m) for key, (r, i_m, n)

1289

in position_map.iteritems()]

1290

# Sort by the index memo, so that we request records from the

1291

# same pack file together, and in forward-sorted order

1292

records.sort(key=operator.itemgetter(1))

1293

raw_record_map = {}

1294

for key, data in self._read_records_iter_unchecked(records):

1295

(record_details, index_memo, next) = position_map[key]

1296

raw_record_map[key] = data, record_details, next

1297

return raw_record_map

1298

except errors.RetryWithNewPacks, e:

1299

self._access.reload_or_raise(e)

1300

1301

@classmethod

1302

def _split_by_prefix(cls, keys):

1303

"""For the given keys, split them up based on their prefix.

1304

1305

To keep memory pressure somewhat under control, split the

1306

requests back into per-file-id requests, otherwise "bzr co"

1307

extracts the full tree into memory before writing it to disk.

1308

This should be revisited if _get_content_maps() can ever cross

1309

file-id boundaries.

1310

1311

The keys for a given file_id are kept in the same relative order.

1312

Ordering between file_ids is not, though prefix_order will return the

1313

order that the key was first seen.

1314

1315

:param keys: An iterable of key tuples

1316

:return: (split_map, prefix_order)

1317

split_map A dictionary mapping prefix => keys

1318

prefix_order The order that we saw the various prefixes

1319

"""

1320

split_by_prefix = {}

1321

prefix_order = []

1322

for key in keys:

1323

if len(key) == 1:

1324

prefix = ''

1325

else:

1326

prefix = key[0]

1327

1328

if prefix in split_by_prefix:

1329

split_by_prefix[prefix].append(key)

1330

else:

1331

split_by_prefix[prefix] = [key]

1332

prefix_order.append(prefix)

1333

return split_by_prefix, prefix_order

1334

1335

def _group_keys_for_io(self, keys, non_local_keys, positions,

1336

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1337

"""For the given keys, group them into 'best-sized' requests.

1338

1339

The idea is to avoid making 1 request per file, but to never try to

1340

unpack an entire 1.5GB source tree in a single pass. Also when

1341

possible, we should try to group requests to the same pack file

1342

together.

1343

1344

:return: list of (keys, non_local) tuples that indicate what keys

1345

should be fetched next.

1346

"""

1347

# TODO: Ideally we would group on 2 factors. We want to extract texts

1348

# from the same pack file together, and we want to extract all

1349

# the texts for a given build-chain together. Ultimately it

1350

# probably needs a better global view.

1351

total_keys = len(keys)

1352

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1353

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1354

cur_keys = []

1355

cur_non_local = set()

1356

cur_size = 0

1357

result = []

1358

sizes = []

1359

for prefix in prefix_order:

1360

keys = prefix_split_keys[prefix]

1361

non_local = prefix_split_non_local_keys.get(prefix, [])

1362

1363

this_size = self._index._get_total_build_size(keys, positions)

1364

cur_size += this_size

1365

cur_keys.extend(keys)

1366

cur_non_local.update(non_local)

1367

if cur_size > _min_buffer_size:

1368

result.append((cur_keys, cur_non_local))

1369

sizes.append(cur_size)

1370

cur_keys = []

1371

cur_non_local = set()

1372

cur_size = 0

1373

if cur_keys:

1374

result.append((cur_keys, cur_non_local))

1375

sizes.append(cur_size)

1376

return result

1377

1378

def get_record_stream(self, keys, ordering, include_delta_closure):

1379

"""Get a stream of records for keys.

1380

1381

:param keys: The keys to include.

1382

:param ordering: Either 'unordered' or 'topological'. A topologically

1383

sorted stream has compression parents strictly before their

1384

children.

1385

:param include_delta_closure: If True then the closure across any

1386

compression parents will be included (in the opaque data).

1387

:return: An iterator of ContentFactory objects, each of which is only

1388

valid until the iterator is advanced.

1389

"""

1390

# keys might be a generator

1391

keys = set(keys)

1392

if not keys:

1393

return

1394

if not self._index.has_graph:

1395

# Cannot sort when no graph has been stored.

1396

ordering = 'unordered'

1397

1398

remaining_keys = keys

1399

while True:

1400

try:

1401

keys = set(remaining_keys)

1402

for content_factory in self._get_remaining_record_stream(keys,

1403

ordering, include_delta_closure):

1404

remaining_keys.discard(content_factory.key)

1405

yield content_factory

1406

return

1407

except errors.RetryWithNewPacks, e:

1408

self._access.reload_or_raise(e)

1409

1410

def _get_remaining_record_stream(self, keys, ordering,

1411

include_delta_closure):

1412

"""This function is the 'retry' portion for get_record_stream."""

1413

if include_delta_closure:

1414

positions = self._get_components_positions(keys, allow_missing=True)

1415

else:

1416

build_details = self._index.get_build_details(keys)

1417

# map from key to

1418

# (record_details, access_memo, compression_parent_key)

1419

positions = dict((key, self._build_details_to_components(details))

1420

for key, details in build_details.iteritems())

1421

absent_keys = keys.difference(set(positions))

1422

# There may be more absent keys : if we're missing the basis component

1423

# and are trying to include the delta closure.

1424

# XXX: We should not ever need to examine remote sources because we do

1425

# not permit deltas across versioned files boundaries.

1426

if include_delta_closure:

1427

needed_from_fallback = set()

1428

# Build up reconstructable_keys dict. key:True in this dict means

1429

# the key can be reconstructed.

1430

reconstructable_keys = {}

1431

for key in keys:

1432

# the delta chain

1433

try:

1434

chain = [key, positions[key][2]]

1435

except KeyError:

1436

needed_from_fallback.add(key)

1437

continue

1438

result = True

1439

while chain[-1] is not None:

1440

if chain[-1] in reconstructable_keys:

1441

result = reconstructable_keys[chain[-1]]

1442

break

1443

else:

1444

try:

1445

chain.append(positions[chain[-1]][2])

1446

except KeyError:

1447

# missing basis component

1448

needed_from_fallback.add(chain[-1])

1449

result = True

1450

break

1451

for chain_key in chain[:-1]:

1452

reconstructable_keys[chain_key] = result

1453

if not result:

1454

needed_from_fallback.add(key)

1455

# Double index lookups here : need a unified api ?

1456

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1457

if ordering in ('topological', 'groupcompress'):

1458

if ordering == 'topological':

1459

# Global topological sort

1460

present_keys = tsort.topo_sort(global_map)

1461

else:

1462

present_keys = sort_groupcompress(global_map)

1463

# Now group by source:

1464

source_keys = []

1465

current_source = None

1466

for key in present_keys:

1467

for parent_map in parent_maps:

1468

if key in parent_map:

1469

key_source = parent_map

1470

break

1471

if current_source is not key_source:

1472

source_keys.append((key_source, []))

1473

current_source = key_source

1474

source_keys[-1][1].append(key)

1475

else:

1476

if ordering != 'unordered':

1477

raise AssertionError('valid values for ordering are:'

1478

' "unordered", "groupcompress" or "topological" not: %r'

1479

% (ordering,))

1480

# Just group by source; remote sources first.

1481

present_keys = []

1482

source_keys = []

1483

for parent_map in reversed(parent_maps):

1484

source_keys.append((parent_map, []))

1485

for key in parent_map:

1486

present_keys.append(key)

1487

source_keys[-1][1].append(key)

1488

# We have been requested to return these records in an order that

1489

# suits us. So we ask the index to give us an optimally sorted

1490

# order.

1491

for source, sub_keys in source_keys:

1492

if source is parent_maps[0]:

1493

# Only sort the keys for this VF

1494

self._index._sort_keys_by_io(sub_keys, positions)

1495

absent_keys = keys - set(global_map)

1496

for key in absent_keys:

1497

yield AbsentContentFactory(key)

1498

# restrict our view to the keys we can answer.

1499

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1500

# XXX: At that point we need to consider the impact of double reads by

1501

# utilising components multiple times.

1502

if include_delta_closure:

1503

# XXX: get_content_maps performs its own index queries; allow state

1504

# to be passed in.

1505

non_local_keys = needed_from_fallback - absent_keys

1506

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1507

non_local_keys,

1508

positions):

1509

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1510

global_map,

1511

ordering=ordering)

1512

for record in generator.get_record_stream():

1513

yield record

1514

else:

1515

for source, keys in source_keys:

1516

if source is parent_maps[0]:

1517

# this KnitVersionedFiles

1518

records = [(key, positions[key][1]) for key in keys]

1519

for key, raw_data in self._read_records_iter_unchecked(records):

1520

(record_details, index_memo, _) = positions[key]

1521

yield KnitContentFactory(key, global_map[key],

1522

record_details, None, raw_data, self._factory.annotated, None)

1523

else:

1524

vf = self._immediate_fallback_vfs[parent_maps.index(source) - 1]

1525

for record in vf.get_record_stream(keys, ordering,

1526

include_delta_closure):

1527

yield record

1528

1529

def get_sha1s(self, keys):

1530

"""See VersionedFiles.get_sha1s()."""

1531

missing = set(keys)

1532

record_map = self._get_record_map(missing, allow_missing=True)

1533

result = {}

1534

for key, details in record_map.iteritems():

1535

if key not in missing:

1536

continue

1537

# record entry 2 is the 'digest'.

1538

result[key] = details[2]

1539

missing.difference_update(set(result))

1540

for source in self._immediate_fallback_vfs:

1541

if not missing:

1542

break

1543

new_result = source.get_sha1s(missing)

1544

result.update(new_result)

1545

missing.difference_update(set(new_result))

1546

return result

1547

1548

def insert_record_stream(self, stream):

1549

"""Insert a record stream into this container.

1550

1551

:param stream: A stream of records to insert.

1552

:return: None

1553

:seealso VersionedFiles.get_record_stream:

1554

"""

1555

def get_adapter(adapter_key):

1556

try:

1557

return adapters[adapter_key]

1558

except KeyError:

1559

adapter_factory = adapter_registry.get(adapter_key)

1560

adapter = adapter_factory(self)

1561

adapters[adapter_key] = adapter

1562

return adapter

1563

delta_types = set()

1564

if self._factory.annotated:

1565

# self is annotated, we need annotated knits to use directly.

1566

annotated = "annotated-"

1567

convertibles = []

1568

else:

1569

# self is not annotated, but we can strip annotations cheaply.

1570

annotated = ""

1571

convertibles = set(["knit-annotated-ft-gz"])

1572

if self._max_delta_chain:

1573

delta_types.add("knit-annotated-delta-gz")

1574

convertibles.add("knit-annotated-delta-gz")

1575

# The set of types we can cheaply adapt without needing basis texts.

1576

native_types = set()

1577

if self._max_delta_chain:

1578

native_types.add("knit-%sdelta-gz" % annotated)

1579

delta_types.add("knit-%sdelta-gz" % annotated)

1580

native_types.add("knit-%sft-gz" % annotated)

1581

knit_types = native_types.union(convertibles)

1582

adapters = {}

1583

# Buffer all index entries that we can't add immediately because their

1584

# basis parent is missing. We don't buffer all because generating

1585

# annotations may require access to some of the new records. However we

1586

# can't generate annotations from new deltas until their basis parent

1587

# is present anyway, so we get away with not needing an index that

1588

# includes the new keys.

1589

1590

# See <http://launchpad.net/bugs/300177> about ordering of compression

1591

# parents in the records - to be conservative, we insist that all

1592

# parents must be present to avoid expanding to a fulltext.

1593

1594

# key = basis_parent, value = index entry to add

1595

buffered_index_entries = {}

1596

for record in stream:

1597

kind = record.storage_kind

1598

if kind.startswith('knit-') and kind.endswith('-gz'):

1599

# Check that the ID in the header of the raw knit bytes matches

1600

# the record metadata.

1601

raw_data = record._raw_record

1602

df, rec = self._parse_record_header(record.key, raw_data)

1603

df.close()

1604

buffered = False

1605

parents = record.parents

1606

if record.storage_kind in delta_types:

1607

# TODO: eventually the record itself should track

1608

# compression_parent

1609

compression_parent = parents[0]

1610

else:

1611

compression_parent = None

1612

# Raise an error when a record is missing.

1613

if record.storage_kind == 'absent':

1614

raise RevisionNotPresent([record.key], self)

1615

elif ((record.storage_kind in knit_types)

1616

and (compression_parent is None

1617

or not self._immediate_fallback_vfs

1618

or self._index.has_key(compression_parent)

1619

or not self.has_key(compression_parent))):

1620

# we can insert the knit record literally if either it has no

1621

# compression parent OR we already have its basis in this kvf

1622

# OR the basis is not present even in the fallbacks. In the

1623

# last case it will either turn up later in the stream and all

1624

# will be well, or it won't turn up at all and we'll raise an

1625

# error at the end.

1626

1627

# TODO: self.has_key is somewhat redundant with

1628

# self._index.has_key; we really want something that directly

1629

# asks if it's only present in the fallbacks. -- mbp 20081119

1630

if record.storage_kind not in native_types:

1631

try:

1632

adapter_key = (record.storage_kind, "knit-delta-gz")

1633

adapter = get_adapter(adapter_key)

1634

except KeyError:

1635

adapter_key = (record.storage_kind, "knit-ft-gz")

1636

adapter = get_adapter(adapter_key)

1637

bytes = adapter.get_bytes(record)

1638

else:

1639

# It's a knit record, it has a _raw_record field (even if

1640

# it was reconstituted from a network stream).

1641

bytes = record._raw_record

1642

options = [record._build_details[0]]

1643

if record._build_details[1]:

1644

options.append('no-eol')

1645

# Just blat it across.

1646

# Note: This does end up adding data on duplicate keys. As

1647

# modern repositories use atomic insertions this should not

1648

# lead to excessive growth in the event of interrupted fetches.

1649

# 'knit' repositories may suffer excessive growth, but as a

1650

# deprecated format this is tolerable. It can be fixed if

1651

# needed by in the kndx index support raising on a duplicate

1652

# add with identical parents and options.

1653

access_memo = self._access.add_raw_records(

1654

[(record.key, len(bytes))], bytes)[0]

1655

index_entry = (record.key, options, access_memo, parents)

1656

if 'fulltext' not in options:

1657

# Not a fulltext, so we need to make sure the compression

1658

# parent will also be present.

1659

# Note that pack backed knits don't need to buffer here

1660

# because they buffer all writes to the transaction level,

1661

# but we don't expose that difference at the index level. If

1662

# the query here has sufficient cost to show up in

1663

# profiling we should do that.

1664

1665

# They're required to be physically in this

1666

# KnitVersionedFiles, not in a fallback.

1667

if not self._index.has_key(compression_parent):

1668

pending = buffered_index_entries.setdefault(

1669

compression_parent, [])

1670

pending.append(index_entry)

1671

buffered = True

1672

if not buffered:

1673

self._index.add_records([index_entry])

1674

elif record.storage_kind == 'chunked':

1675

self.add_lines(record.key, parents,

1676

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1677

else:

1678

# Not suitable for direct insertion as a

1679

# delta, either because it's not the right format, or this

1680

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1681

# 0) or because it depends on a base only present in the

1682

# fallback kvfs.

1683

self._access.flush()

1684

try:

1685

# Try getting a fulltext directly from the record.

1686

bytes = record.get_bytes_as('fulltext')

1687

except errors.UnavailableRepresentation:

1688

adapter_key = record.storage_kind, 'fulltext'

1689

adapter = get_adapter(adapter_key)

1690

bytes = adapter.get_bytes(record)

1691

lines = split_lines(bytes)

1692

try:

1693

self.add_lines(record.key, parents, lines)

1694

except errors.RevisionAlreadyPresent:

1695

pass

1696

# Add any records whose basis parent is now available.

1697

if not buffered:

1698

added_keys = [record.key]

1699

while added_keys:

1700

key = added_keys.pop(0)

1701

if key in buffered_index_entries:

1702

index_entries = buffered_index_entries[key]

1703

self._index.add_records(index_entries)

1704

added_keys.extend(

1705

[index_entry[0] for index_entry in index_entries])

1706

del buffered_index_entries[key]

1707

if buffered_index_entries:

1708

# There were index entries buffered at the end of the stream,

1709

# So these need to be added (if the index supports holding such

1710

# entries for later insertion)

1711

all_entries = []

1712

for key in buffered_index_entries:

1713

index_entries = buffered_index_entries[key]

1714

all_entries.extend(index_entries)

1715

self._index.add_records(

1716

all_entries, missing_compression_parents=True)

1717

1718

def get_missing_compression_parent_keys(self):

1719

"""Return an iterable of keys of missing compression parents.

1720

1721

Check this after calling insert_record_stream to find out if there are

1722

any missing compression parents. If there are, the records that

1723

depend on them are not able to be inserted safely. For atomic

1724

KnitVersionedFiles built on packs, the transaction should be aborted or

1725

suspended - commit will fail at this point. Nonatomic knits will error

1726

earlier because they have no staging area to put pending entries into.

1727

"""

1728

return self._index.get_missing_compression_parents()

1729

1730

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1731

"""Iterate over the lines in the versioned files from keys.

1732

1733

This may return lines from other keys. Each item the returned

1734

iterator yields is a tuple of a line and a text version that that line

1735

is present in (not introduced in).

1736

1737

Ordering of results is in whatever order is most suitable for the

1738

underlying storage format.

1739

1740

If a progress bar is supplied, it may be used to indicate progress.

1741

The caller is responsible for cleaning up progress bars (because this

1742

is an iterator).

1743

1744

NOTES:

1745

* Lines are normalised by the underlying store: they will all have \\n

1746

terminators.

1747

* Lines are returned in arbitrary order.

1748

* If a requested key did not change any lines (or didn't have any

1749

lines), it may not be mentioned at all in the result.

1750

1751

:param pb: Progress bar supplied by caller.

1752

:return: An iterator over (line, key).

1753

"""

1754

if pb is None:

1755

pb = ui.ui_factory.nested_progress_bar()

1756

keys = set(keys)

1757

total = len(keys)

1758

done = False

1759

while not done:

1760

try:

1761

# we don't care about inclusions, the caller cares.

1762

# but we need to setup a list of records to visit.

1763

# we need key, position, length

1764

key_records = []

1765

build_details = self._index.get_build_details(keys)

1766

for key, details in build_details.iteritems():

1767

if key in keys:

1768

key_records.append((key, details[0]))

1769

records_iter = enumerate(self._read_records_iter(key_records))

1770

for (key_idx, (key, data, sha_value)) in records_iter:

1771

pb.update('Walking content', key_idx, total)

1772

compression_parent = build_details[key][1]

1773

if compression_parent is None:

1774

# fulltext

1775

line_iterator = self._factory.get_fulltext_content(data)

1776

else:

1777

# Delta

1778

line_iterator = self._factory.get_linedelta_content(data)

1779

# Now that we are yielding the data for this key, remove it

1780

# from the list

1781

keys.remove(key)

1782

# XXX: It might be more efficient to yield (key,

1783

# line_iterator) in the future. However for now, this is a

1784

# simpler change to integrate into the rest of the

1785

# codebase. RBC 20071110

1786

for line in line_iterator:

1787

yield line, key

1788

done = True

1789

except errors.RetryWithNewPacks, e:

1790

self._access.reload_or_raise(e)

1791

# If there are still keys we've not yet found, we look in the fallback

1792

# vfs, and hope to find them there. Note that if the keys are found

1793

# but had no changes or no content, the fallback may not return

1794

# anything.

1795

if keys and not self._immediate_fallback_vfs:

1796

# XXX: strictly the second parameter is meant to be the file id

1797

# but it's not easily accessible here.

1798

raise RevisionNotPresent(keys, repr(self))

1799

for source in self._immediate_fallback_vfs:

1800

if not keys:

1801

break

1802

source_keys = set()

1803

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1804

source_keys.add(key)

1805

yield line, key

1806

keys.difference_update(source_keys)

1807

pb.update('Walking content', total, total)

1808

1809

def _make_line_delta(self, delta_seq, new_content):

1810

"""Generate a line delta from delta_seq and new_content."""

1811

diff_hunks = []

1812

for op in delta_seq.get_opcodes():

1813

if op[0] == 'equal':

1814

continue

1815

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

1816

return diff_hunks

1817

1818

def _merge_annotations(self, content, parents, parent_texts={},

1819

delta=None, annotated=None,

1820

left_matching_blocks=None):

1821

"""Merge annotations for content and generate deltas.

1822

1823

This is done by comparing the annotations based on changes to the text

1824

and generating a delta on the resulting full texts. If annotations are

1825

not being created then a simple delta is created.

1826

"""

1827

if left_matching_blocks is not None:

1828

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

1829

else:

1830

delta_seq = None

1831

if annotated:

1832

for parent_key in parents:

1833

merge_content = self._get_content(parent_key, parent_texts)

1834

if (parent_key == parents[0] and delta_seq is not None):

1835

seq = delta_seq

1836

else:

1837

seq = patiencediff.PatienceSequenceMatcher(

1838

None, merge_content.text(), content.text())

1839

for i, j, n in seq.get_matching_blocks():

1840

if n == 0:

1841

continue

1842

# this copies (origin, text) pairs across to the new

1843

# content for any line that matches the last-checked

1844

# parent.

1845

content._lines[j:j+n] = merge_content._lines[i:i+n]

1846

# XXX: Robert says the following block is a workaround for a

1847

# now-fixed bug and it can probably be deleted. -- mbp 20080618

1848

if content._lines and content._lines[-1][1][-1] != '\n':

1849

# The copied annotation was from a line without a trailing EOL,

1850

# reinstate one for the content object, to ensure correct

1851

# serialization.

1852

line = content._lines[-1][1] + '\n'

1853

content._lines[-1] = (content._lines[-1][0], line)

1854

if delta:

1855

if delta_seq is None:

1856

reference_content = self._get_content(parents[0], parent_texts)

1857

new_texts = content.text()

1858

old_texts = reference_content.text()

1859

delta_seq = patiencediff.PatienceSequenceMatcher(

1860

None, old_texts, new_texts)

1861

return self._make_line_delta(delta_seq, content)

1862

1863

def _parse_record(self, version_id, data):

1864

"""Parse an original format knit record.

1865

1866

These have the last element of the key only present in the stored data.

1867

"""

1868

rec, record_contents = self._parse_record_unchecked(data)

1869

self._check_header_version(rec, version_id)

1870

return record_contents, rec[3]

1871

1872

def _parse_record_header(self, key, raw_data):

1873

"""Parse a record header for consistency.

1874

1875

:return: the header and the decompressor stream.

1876

as (stream, header_record)

1877

"""

1878

df = gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1879

try:

1880

# Current serialise

1881

rec = self._check_header(key, df.readline())

1882

except Exception, e:

1883

raise KnitCorrupt(self,

1884

"While reading {%s} got %s(%s)"

1885

% (key, e.__class__.__name__, str(e)))

1886

return df, rec

1887

1888

def _parse_record_unchecked(self, data):

1889

# profiling notes:

1890

# 4168 calls in 2880 217 internal

1891

# 4168 calls to _parse_record_header in 2121

1892

# 4168 calls to readlines in 330

1893

df = gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1894

try:

1895

record_contents = df.readlines()

1896

except Exception, e:

1897

raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %

1898

(data, e.__class__.__name__, str(e)))

1899

header = record_contents.pop(0)

1900

rec = self._split_header(header)

1901

last_line = record_contents.pop()

1902

if len(record_contents) != int(rec[2]):

1903

raise KnitCorrupt(self,

1904

'incorrect number of lines %s != %s'

1905

' for version {%s} %s'

1906

% (len(record_contents), int(rec[2]),

1907

rec[1], record_contents))

1908

if last_line != 'end %s\n' % rec[1]:

1909

raise KnitCorrupt(self,

1910

'unexpected version end line %r, wanted %r'

1911

% (last_line, rec[1]))

1912

df.close()

1913

return rec, record_contents

1914

1915

def _read_records_iter(self, records):

1916

"""Read text records from data file and yield result.

1917

1918

The result will be returned in whatever is the fastest to read.

1919

Not by the order requested. Also, multiple requests for the same

1920

record will only yield 1 response.

1921

:param records: A list of (key, access_memo) entries

1922

:return: Yields (key, contents, digest) in the order

1923

read, not the order requested

1924

"""

1925

if not records:

1926

return

1927

1928

# XXX: This smells wrong, IO may not be getting ordered right.

1929

needed_records = sorted(set(records), key=operator.itemgetter(1))

1930

if not needed_records:

1931

return

1932

1933

# The transport optimizes the fetching as well

1934

# (ie, reads continuous ranges.)

1935

raw_data = self._access.get_raw_records(

1936

[index_memo for key, index_memo in needed_records])

1937

1938

for (key, index_memo), data in \

1939

izip(iter(needed_records), raw_data):

1940

content, digest = self._parse_record(key[-1], data)

1941

yield key, content, digest

1942

1943

def _read_records_iter_raw(self, records):

1944

"""Read text records from data file and yield raw data.

1945

1946

This unpacks enough of the text record to validate the id is

1947

as expected but thats all.

1948

1949

Each item the iterator yields is (key, bytes,

1950

expected_sha1_of_full_text).

1951

"""

1952

for key, data in self._read_records_iter_unchecked(records):

1953

# validate the header (note that we can only use the suffix in

1954

# current knit records).

1955

df, rec = self._parse_record_header(key, data)

1956

df.close()

1957

yield key, data, rec[3]

1958

1959

def _read_records_iter_unchecked(self, records):

1960

"""Read text records from data file and yield raw data.

1961

1962

No validation is done.

1963

1964

Yields tuples of (key, data).

1965

"""

1966

# setup an iterator of the external records:

1967

# uses readv so nice and fast we hope.

1968

if len(records):

1969

# grab the disk data needed.

1970

needed_offsets = [index_memo for key, index_memo

1971

in records]

1972

raw_records = self._access.get_raw_records(needed_offsets)

1973

1974

for key, index_memo in records:

1975

data = raw_records.next()

1976

yield key, data

1977

1978

def _record_to_data(self, key, digest, lines, dense_lines=None):

1979

"""Convert key, digest, lines into a raw data block.

1980

1981

:param key: The key of the record. Currently keys are always serialised

1982

using just the trailing component.

1983

:param dense_lines: The bytes of lines but in a denser form. For

1984

instance, if lines is a list of 1000 bytestrings each ending in \n,

1985

dense_lines may be a list with one line in it, containing all the

1986

1000's lines and their \n's. Using dense_lines if it is already

1987

known is a win because the string join to create bytes in this

1988

function spends less time resizing the final string.

1989

:return: (len, a StringIO instance with the raw data ready to read.)

1990

"""

1991

chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)]

1992

chunks.extend(dense_lines or lines)

1993

chunks.append("end %s\n" % key[-1])

1994

for chunk in chunks:

1995

if type(chunk) is not str:

1996

raise AssertionError(

1997

'data must be plain bytes was %s' % type(chunk))

1998

if lines and lines[-1][-1] != '\n':

1999

raise ValueError('corrupt lines value %r' % lines)

2000

compressed_bytes = tuned_gzip.chunks_to_gzip(chunks)

2001

return len(compressed_bytes), compressed_bytes

2002

2003

def _split_header(self, line):

2004

rec = line.split()

2005

if len(rec) != 4:

2006

raise KnitCorrupt(self,

2007

'unexpected number of elements in record header')

2008

return rec

2009

2010

def keys(self):

2011

"""See VersionedFiles.keys."""

2012

if 'evil' in debug.debug_flags:

2013

trace.mutter_callsite(2, "keys scales with size of history")

2014

sources = [self._index] + self._immediate_fallback_vfs

2015

result = set()

2016

for source in sources:

2017

result.update(source.keys())

2018

return result

2019

2020

2021

class _ContentMapGenerator(object):

2022

"""Generate texts or expose raw deltas for a set of texts."""

2023

2024

def __init__(self, ordering='unordered'):

2025

self._ordering = ordering

2026

2027

def _get_content(self, key):

2028

"""Get the content object for key."""

2029

# Note that _get_content is only called when the _ContentMapGenerator

2030

# has been constructed with just one key requested for reconstruction.

2031

if key in self.nonlocal_keys:

2032

record = self.get_record_stream().next()

2033

# Create a content object on the fly

2034

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

2035

return PlainKnitContent(lines, record.key)

2036

else:

2037

# local keys we can ask for directly

2038

return self._get_one_work(key)

2039

2040

def get_record_stream(self):

2041

"""Get a record stream for the keys requested during __init__."""

2042

for record in self._work():

2043

yield record

2044

2045

def _work(self):

2046

"""Produce maps of text and KnitContents as dicts.

2047

767

2048

:return: (text_map, content_map) where text_map contains the texts for

768

the requested versions and content_map contains the KnitContents.

769

Both dicts take version_ids as their keys.

2049

the requested versions and content_map contains the KnitContents.

770

2050

"""

771

for version_id in version_ids:

772

if not self.has_version(version_id):

773

raise RevisionNotPresent(version_id, self.filename)

774

record_map = self._get_record_map(version_ids)

775

776

text_map = {}

777

content_map = {}

778

final_content = {}

779

for version_id in version_ids:

2051

# NB: By definition we never need to read remote sources unless texts

2052

# are requested from them: we don't delta across stores - and we

2053

# explicitly do not want to to prevent data loss situations.

2054

if self.global_map is None:

2055

self.global_map = self.vf.get_parent_map(self.keys)

2056

nonlocal_keys = self.nonlocal_keys

2057

2058

missing_keys = set(nonlocal_keys)

2059

# Read from remote versioned file instances and provide to our caller.

2060

for source in self.vf._immediate_fallback_vfs:

2061

if not missing_keys:

2062

break

2063

# Loop over fallback repositories asking them for texts - ignore

2064

# any missing from a particular fallback.

2065

for record in source.get_record_stream(missing_keys,

2066

self._ordering, True):

2067

if record.storage_kind == 'absent':

2068

# Not in thie particular stream, may be in one of the

2069

# other fallback vfs objects.

2070

continue

2071

missing_keys.remove(record.key)

2072

yield record

2073

2074

if self._raw_record_map is None:

2075

raise AssertionError('_raw_record_map should have been filled')

2076

first = True

2077

for key in self.keys:

2078

if key in self.nonlocal_keys:

2079

continue

2080

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2081

first = False

2082

2083

def _get_one_work(self, requested_key):

2084

# Now, if we have calculated everything already, just return the

2085

# desired text.

2086

if requested_key in self._contents_map:

2087

return self._contents_map[requested_key]

2088

# To simplify things, parse everything at once - code that wants one text

2089

# probably wants them all.

2090

# FUTURE: This function could be improved for the 'extract many' case

2091

# by tracking each component and only doing the copy when the number of

2092

# children than need to apply delta's to it is > 1 or it is part of the

2093

# final output.

2094

multiple_versions = len(self.keys) != 1

2095

if self._record_map is None:

2096

self._record_map = self.vf._raw_map_to_record_map(

2097

self._raw_record_map)

2098

record_map = self._record_map

2099

# raw_record_map is key:

2100

# Have read and parsed records at this point.

2101

for key in self.keys:

2102

if key in self.nonlocal_keys:

2103

# already handled

2104

continue

780

2105

components = []

781

cursor = version_id

2106

cursor = key

782

2107

while cursor is not None:

783

method, data, digest, next = record_map[cursor]

784

components.append((cursor, method, data, digest))

785

if cursor in content_map:

2108

try:

2109

record, record_details, digest, next = record_map[cursor]

2110

except KeyError:

2111

raise RevisionNotPresent(cursor, self)

2112

components.append((cursor, record, record_details, digest))

2113

cursor = next

2114

if cursor in self._contents_map:

2115

# no need to plan further back

2116

components.append((cursor, None, None, None))

786

2117

break

787

cursor = next

788

2118

789

2119

content = None

790

for component_id, method, data, digest in reversed(components):

791

if component_id in content_map:

792

content = content_map[component_id]

2120

for (component_id, record, record_details,

2121

digest) in reversed(components):

2122

if component_id in self._contents_map:

2123

content = self._contents_map[component_id]

793

2124

else:

794

version_idx = self._index.lookup(component_id)

795

if method == 'fulltext':

796

assert content is None

797

content = self.factory.parse_fulltext(data, version_idx)

798

elif method == 'line-delta':

799

delta = self.factory.parse_line_delta(data, version_idx)

800

content = content.copy()

801

content._lines = self._apply_delta(content._lines,

802

delta)

803

content_map[component_id] = content

804

805

if 'no-eol' in self._index.get_options(version_id):

806

content = content.copy()

807

line = content._lines[-1][1].rstrip('\n')

808

content._lines[-1] = (content._lines[-1][0], line)

809

final_content[version_id] = content

2125

content, delta = self._factory.parse_record(key[-1],

2126

record, record_details, content,

2127

copy_base_content=multiple_versions)

2128

if multiple_versions:

2129

self._contents_map[component_id] = content

810

2130

811

2131

# digest here is the digest from the last applied component.

812

2132

text = content.text()

813

if sha_strings(text) != digest:

814

raise KnitCorrupt(self.filename,

815

'sha-1 does not match %s' % version_id)

816

817

text_map[version_id] = text

818

return text_map, final_content

819

820

def iter_lines_added_or_present_in_versions(self, version_ids=None,

821

pb=None):

822

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

823

if version_ids is None:

824

version_ids = self.versions()

825

if pb is None:

826

pb = progress.DummyProgress()

827

# we don't care about inclusions, the caller cares.

828

# but we need to setup a list of records to visit.

829

# we need version_id, position, length

830

version_id_records = []

831

requested_versions = set(version_ids)

832

# filter for available versions

833

for version_id in requested_versions:

834

if not self.has_version(version_id):

835

raise RevisionNotPresent(version_id, self.filename)

836

# get a in-component-order queue:

837

for version_id in self.versions():

838

if version_id in requested_versions:

839

data_pos, length = self._index.get_position(version_id)

840

version_id_records.append((version_id, data_pos, length))

841

842

total = len(version_id_records)

843

for version_idx, (version_id, data, sha_value) in \

844

enumerate(self._data.read_records_iter(version_id_records)):

845

pb.update('Walking content.', version_idx, total)

846

method = self._index.get_method(version_id)

847

version_idx = self._index.lookup(version_id)

848

assert method in ('fulltext', 'line-delta')

849

if method == 'fulltext':

850

content = self.factory.parse_fulltext(data, version_idx)

851

for line in content.text():

852

yield line

853

else:

854

delta = self.factory.parse_line_delta(data, version_idx)

855

for start, end, count, lines in delta:

856

for origin, line in lines:

857

yield line

858

pb.update('Walking content.', total, total)

859

860

def num_versions(self):

861

"""See VersionedFile.num_versions()."""

862

return self._index.num_versions()

863

864

__len__ = num_versions

865

866

def annotate_iter(self, version_id):

867

"""See VersionedFile.annotate_iter."""

868

content = self._get_content(version_id)

869

for origin, text in content.annotate_iter():

870

yield origin, text

871

872

def get_parents(self, version_id):

873

"""See VersionedFile.get_parents."""

874

# perf notes:

875

# optimism counts!

876

# 52554 calls in 1264 872 internal down from 3674

877

try:

878

return self._index.get_parents(version_id)

879

except KeyError:

880

raise RevisionNotPresent(version_id, self.filename)

881

882

def get_parents_with_ghosts(self, version_id):

883

"""See VersionedFile.get_parents."""

884

try:

885

return self._index.get_parents_with_ghosts(version_id)

886

except KeyError:

887

raise RevisionNotPresent(version_id, self.filename)

888

889

def get_ancestry(self, versions):

890

"""See VersionedFile.get_ancestry."""

891

if isinstance(versions, basestring):

892

versions = [versions]

893

if not versions:

894

return []

895

self._check_versions_present(versions)

896

return self._index.get_ancestry(versions)

897

898

def get_ancestry_with_ghosts(self, versions):

899

"""See VersionedFile.get_ancestry_with_ghosts."""

900

if isinstance(versions, basestring):

901

versions = [versions]

902

if not versions:

903

return []

904

self._check_versions_present(versions)

905

return self._index.get_ancestry_with_ghosts(versions)

906

907

#@deprecated_method(zero_eight)

908

def walk(self, version_ids):

909

"""See VersionedFile.walk."""

910

# We take the short path here, and extract all relevant texts

911

# and put them in a weave and let that do all the work. Far

912

# from optimal, but is much simpler.

913

# FIXME RB 20060228 this really is inefficient!

914

from bzrlib.weave import Weave

915

916

w = Weave(self.filename)

917

ancestry = self.get_ancestry(version_ids)

918

sorted_graph = topo_sort(self._index.get_graph())

919

version_list = [vid for vid in sorted_graph if vid in ancestry]

920

921

for version_id in version_list:

922

lines = self.get_lines(version_id)

923

w.add_lines(version_id, self.get_parents(version_id), lines)

924

925

for lineno, insert_id, dset, line in w.walk(version_ids):

926

yield lineno, insert_id, dset, line

927

928

def plan_merge(self, ver_a, ver_b):

929

"""See VersionedFile.plan_merge."""

930

ancestors_b = set(self.get_ancestry(ver_b))

931

def status_a(revision, text):

932

if revision in ancestors_b:

933

return 'killed-b', text

934

else:

935

return 'new-a', text

936

937

ancestors_a = set(self.get_ancestry(ver_a))

938

def status_b(revision, text):

939

if revision in ancestors_a:

940

return 'killed-a', text

941

else:

942

return 'new-b', text

943

944

annotated_a = self.annotate(ver_a)

945

annotated_b = self.annotate(ver_b)

946

plain_a = [t for (a, t) in annotated_a]

947

plain_b = [t for (a, t) in annotated_b]

948

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

949

a_cur = 0

950

b_cur = 0

951

for ai, bi, l in blocks:

952

# process all mismatched sections

953

# (last mismatched section is handled because blocks always

954

# includes a 0-length last block)

955

for revision, text in annotated_a[a_cur:ai]:

956

yield status_a(revision, text)

957

for revision, text in annotated_b[b_cur:bi]:

958

yield status_b(revision, text)

959

960

# and now the matched section

961

a_cur = ai + l

962

b_cur = bi + l

963

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

964

assert text_a == text_b

965

yield "unchanged", text_a

966

967

968

class _KnitComponentFile(object):

969

"""One of the files used to implement a knit database"""

970

971

def __init__(self, transport, filename, mode, file_mode=None,

972

create_parent_dir=False, dir_mode=None):

973

self._transport = transport

974

self._filename = filename

975

self._mode = mode

976

self._file_mode = file_mode

977

self._dir_mode = dir_mode

978

self._create_parent_dir = create_parent_dir

979

self._need_to_create = False

980

981

def check_header(self, fp):

982

line = fp.readline()

983

if line != self.HEADER:

984

raise KnitHeaderError(badline=line)

985

986

def commit(self):

987

"""Commit is a nop."""

988

989

def __repr__(self):

990

return '%s(%s)' % (self.__class__.__name__, self._filename)

991

992

993

class _KnitIndex(_KnitComponentFile):

994

"""Manages knit index file.

995

996

The index is already kept in memory and read on startup, to enable

2133

actual_sha = sha_strings(text)

2134

if actual_sha != digest:

2135

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2136

if multiple_versions:

2137

return self._contents_map[requested_key]

2138

else:

2139

return content

2140

2141

def _wire_bytes(self):

2142

"""Get the bytes to put on the wire for 'key'.

2143

2144

The first collection of bytes asked for returns the serialised

2145

raw_record_map and the additional details (key, parent) for key.

2146

Subsequent calls return just the additional details (key, parent).

2147

The wire storage_kind given for the first key is 'knit-delta-closure',

2148

For subsequent keys it is 'knit-delta-closure-ref'.

2149

2150

:param key: A key from the content generator.

2151

:return: Bytes to put on the wire.

2152

"""

2153

lines = []

2154

# kind marker for dispatch on the far side,

2155

lines.append('knit-delta-closure')

2156

# Annotated or not

2157

if self.vf._factory.annotated:

2158

lines.append('annotated')

2159

else:

2160

lines.append('')

2161

# then the list of keys

2162

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2163

if key not in self.nonlocal_keys]))

2164

# then the _raw_record_map in serialised form:

2165

map_byte_list = []

2166

# for each item in the map:

2167

# 1 line with key

2168

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2169

# one line with method

2170

# one line with noeol

2171

# one line with next ('' for None)

2172

# one line with byte count of the record bytes

2173

# the record bytes

2174

for key, (record_bytes, (method, noeol), next) in \

2175

self._raw_record_map.iteritems():

2176

key_bytes = '\x00'.join(key)

2177

parents = self.global_map.get(key, None)

2178

if parents is None:

2179

parent_bytes = 'None:'

2180

else:

2181

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2182

method_bytes = method

2183

if noeol:

2184

noeol_bytes = "T"

2185

else:

2186

noeol_bytes = "F"

2187

if next:

2188

next_bytes = '\x00'.join(next)

2189

else:

2190

next_bytes = ''

2191

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2192

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2193

len(record_bytes), record_bytes))

2194

map_bytes = ''.join(map_byte_list)

2195

lines.append(map_bytes)

2196

bytes = '\n'.join(lines)

2197

return bytes

2198

2199

2200

class _VFContentMapGenerator(_ContentMapGenerator):

2201

"""Content map generator reading from a VersionedFiles object."""

2202

2203

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2204

global_map=None, raw_record_map=None, ordering='unordered'):

2205

"""Create a _ContentMapGenerator.

2206

2207

:param versioned_files: The versioned files that the texts are being

2208

extracted from.

2209

:param keys: The keys to produce content maps for.

2210

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2211

which are known to not be in this knit, but rather in one of the

2212

fallback knits.

2213

:param global_map: The result of get_parent_map(keys) (or a supermap).

2214

This is required if get_record_stream() is to be used.

2215

:param raw_record_map: A unparsed raw record map to use for answering

2216

contents.

2217

"""

2218

_ContentMapGenerator.__init__(self, ordering=ordering)

2219

# The vf to source data from

2220

self.vf = versioned_files

2221

# The keys desired

2222

self.keys = list(keys)

2223

# Keys known to be in fallback vfs objects

2224

if nonlocal_keys is None:

2225

self.nonlocal_keys = set()

2226

else:

2227

self.nonlocal_keys = frozenset(nonlocal_keys)

2228

# Parents data for keys to be returned in get_record_stream

2229

self.global_map = global_map

2230

# The chunked lists for self.keys in text form

2231

self._text_map = {}

2232

# A cache of KnitContent objects used in extracting texts.

2233

self._contents_map = {}

2234

# All the knit records needed to assemble the requested keys as full

2235

# texts.

2236

self._record_map = None

2237

if raw_record_map is None:

2238

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2239

allow_missing=True)

2240

else:

2241

self._raw_record_map = raw_record_map

2242

# the factory for parsing records

2243

self._factory = self.vf._factory

2244

2245

2246

class _NetworkContentMapGenerator(_ContentMapGenerator):

2247

"""Content map generator sourced from a network stream."""

2248

2249

def __init__(self, bytes, line_end):

2250

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2251

self._bytes = bytes

2252

self.global_map = {}

2253

self._raw_record_map = {}

2254

self._contents_map = {}

2255

self._record_map = None

2256

self.nonlocal_keys = []

2257

# Get access to record parsing facilities

2258

self.vf = KnitVersionedFiles(None, None)

2259

start = line_end

2260

# Annotated or not

2261

line_end = bytes.find('\n', start)

2262

line = bytes[start:line_end]

2263

start = line_end + 1

2264

if line == 'annotated':

2265

self._factory = KnitAnnotateFactory()

2266

else:

2267

self._factory = KnitPlainFactory()

2268

# list of keys to emit in get_record_stream

2269

line_end = bytes.find('\n', start)

2270

line = bytes[start:line_end]

2271

start = line_end + 1

2272

self.keys = [

2273

tuple(segment.split('\x00')) for segment in line.split('\t')

2274

if segment]

2275

# now a loop until the end. XXX: It would be nice if this was just a

2276

# bunch of the same records as get_record_stream(..., False) gives, but

2277

# there is a decent sized gap stopping that at the moment.

2278

end = len(bytes)

2279

while start < end:

2280

# 1 line with key

2281

line_end = bytes.find('\n', start)

2282

key = tuple(bytes[start:line_end].split('\x00'))

2283

start = line_end + 1

2284

# 1 line with parents (None: for None, '' for ())

2285

line_end = bytes.find('\n', start)

2286

line = bytes[start:line_end]

2287

if line == 'None:':

2288

parents = None

2289

else:

2290

parents = tuple(

2291

[tuple(segment.split('\x00')) for segment in line.split('\t')

2292

if segment])

2293

self.global_map[key] = parents

2294

start = line_end + 1

2295

# one line with method

2296

line_end = bytes.find('\n', start)

2297

line = bytes[start:line_end]

2298

method = line

2299

start = line_end + 1

2300

# one line with noeol

2301

line_end = bytes.find('\n', start)

2302

line = bytes[start:line_end]

2303

noeol = line == "T"

2304

start = line_end + 1

2305

# one line with next ('' for None)

2306

line_end = bytes.find('\n', start)

2307

line = bytes[start:line_end]

2308

if not line:

2309

next = None

2310

else:

2311

next = tuple(bytes[start:line_end].split('\x00'))

2312

start = line_end + 1

2313

# one line with byte count of the record bytes

2314

line_end = bytes.find('\n', start)

2315

line = bytes[start:line_end]

2316

count = int(line)

2317

start = line_end + 1

2318

# the record bytes

2319

record_bytes = bytes[start:start+count]

2320

start = start + count

2321

# put it in the map

2322

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2323

2324

def get_record_stream(self):

2325

"""Get a record stream for for keys requested by the bytestream."""

2326

first = True

2327

for key in self.keys:

2328

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2329

first = False

2330

2331

def _wire_bytes(self):

2332

return self._bytes

2333

2334

2335

class _KndxIndex(object):

2336

"""Manages knit index files

2337

2338

The index is kept in memory and read on startup, to enable

997

2339

fast lookups of revision information. The cursor of the index

998

2340

file is always pointing to the end, making it easy to append

999

2341

entries.

1009

2351

1010

2352

Duplicate entries may be written to the index for a single version id

1011

2353

if this is done then the latter one completely replaces the former:

1012

this allows updates to correct version and parent information.

2354

this allows updates to correct version and parent information.

1013

2355

Note that the two entries may share the delta, and that successive

1014

2356

annotations and references MUST point to the first entry.

1015

2357

1016

2358

The index file on disc contains a header, followed by one line per knit

1017

2359

record. The same revision can be present in an index file more than once.

1018

The first occurrence gets assigned a sequence number starting from 0.

1019

2360

The first occurrence gets assigned a sequence number starting from 0.

2361

1020

2362

The format of a single line is

1021

2363

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1022

2364

REVISION_ID is a utf8-encoded revision id

1023

FLAGS is a comma separated list of flags about the record. Values include

2365

FLAGS is a comma separated list of flags about the record. Values include

1024

2366

no-eol, line-delta, fulltext.

1025

2367

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1026

that the the compressed data starts at.

2368

that the compressed data starts at.

1027

2369

LENGTH is the ascii representation of the length of the data file.

1028

2370

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1029

2371

REVISION_ID.

1030

2372

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1031

2373

revision id already in the knit that is a parent of REVISION_ID.

1032

2374

The ' :' marker is the end of record marker.

1033

2375

1034

2376

partial writes:

1035

when a write is interrupted to the index file, it will result in a line that

1036

does not end in ' :'. If the ' :' is not present at the end of a line, or at

1037

the end of the file, then the record that is missing it will be ignored by

1038

the parser.

2377

when a write is interrupted to the index file, it will result in a line

2378

that does not end in ' :'. If the ' :' is not present at the end of a line,

2379

or at the end of the file, then the record that is missing it will be

2380

ignored by the parser.

1039

2381

1040

2382

When writing new records to the index file, the data is preceded by '\n'

1041

2383

to ensure that records always start on new lines even if the last write was

1042

2384

interrupted. As a result its normal for the last line in the index to be

1043

2385

missing a trailing newline. One can be added with no harmful effects.

2386

2387

:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,

2388

where prefix is e.g. the (fileid,) for .texts instances or () for

2389

constant-mapped things like .revisions, and the old state is

2390

tuple(history_vector, cache_dict). This is used to prevent having an

2391

ABI change with the C extension that reads .kndx files.

1044

2392

"""

1045

2393

1046

2394

HEADER = "# bzr knit index 8\n"

1047

2395

1048

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1049

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1050

1051

def _cache_version(self, version_id, options, pos, size, parents):

2396

def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):

2397

"""Create a _KndxIndex on transport using mapper."""

2398

self._transport = transport

2399

self._mapper = mapper

2400

self._get_scope = get_scope

2401

self._allow_writes = allow_writes

2402

self._is_locked = is_locked

2403

self._reset_cache()

2404

self.has_graph = True

2405

2406

def add_records(self, records, random_id=False, missing_compression_parents=False):

2407

"""Add multiple records to the index.

2408

2409

:param records: a list of tuples:

2410

(key, options, access_memo, parents).

2411

:param random_id: If True the ids being added were randomly generated

2412

and no check for existence will be performed.

2413

:param missing_compression_parents: If True the records being added are

2414

only compressed against texts already in the index (or inside

2415

records). If False the records all refer to unavailable texts (or

2416

texts inside records) as compression parents.

2417

"""

2418

if missing_compression_parents:

2419

# It might be nice to get the edge of the records. But keys isn't

2420

# _wrong_.

2421

keys = sorted(record[0] for record in records)

2422

raise errors.RevisionNotPresent(keys, self)

2423

paths = {}

2424

for record in records:

2425

key = record[0]

2426

prefix = key[:-1]

2427

path = self._mapper.map(key) + '.kndx'

2428

path_keys = paths.setdefault(path, (prefix, []))

2429

path_keys[1].append(record)

2430

for path in sorted(paths):

2431

prefix, path_keys = paths[path]

2432

self._load_prefixes([prefix])

2433

lines = []

2434

orig_history = self._kndx_cache[prefix][1][:]

2435

orig_cache = self._kndx_cache[prefix][0].copy()

2436

2437

try:

2438

for key, options, (_, pos, size), parents in path_keys:

2439

if parents is None:

2440

# kndx indices cannot be parentless.

2441

parents = ()

2442

line = "\n%s %s %s %s %s :" % (

2443

key[-1], ','.join(options), pos, size,

2444

self._dictionary_compress(parents))

2445

if type(line) is not str:

2446

raise AssertionError(

2447

'data must be utf8 was %s' % type(line))

2448

lines.append(line)

2449

self._cache_key(key, options, pos, size, parents)

2450

if len(orig_history):

2451

self._transport.append_bytes(path, ''.join(lines))

2452

else:

2453

self._init_index(path, lines)

2454

except:

2455

# If any problems happen, restore the original values and re-raise

2456

self._kndx_cache[prefix] = (orig_cache, orig_history)

2457

raise

2458

2459

def scan_unvalidated_index(self, graph_index):

2460

"""See _KnitGraphIndex.scan_unvalidated_index."""

2461

# Because kndx files do not support atomic insertion via separate index

2462

# files, they do not support this method.

2463

raise NotImplementedError(self.scan_unvalidated_index)

2464

2465

def get_missing_compression_parents(self):

2466

"""See _KnitGraphIndex.get_missing_compression_parents."""

2467

# Because kndx files do not support atomic insertion via separate index

2468

# files, they do not support this method.

2469

raise NotImplementedError(self.get_missing_compression_parents)

2470

2471

def _cache_key(self, key, options, pos, size, parent_keys):

1052

2472

"""Cache a version record in the history array and index cache.

1053

1054

This is inlined into __init__ for performance. KEEP IN SYNC.

2473

2474

This is inlined into _load_data for performance. KEEP IN SYNC.

1055

2475

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1056

2476

indexes).

1057

2477

"""

2478

prefix = key[:-1]

2479

version_id = key[-1]

2480

# last-element only for compatibilty with the C load_data.

2481

parents = tuple(parent[-1] for parent in parent_keys)

2482

for parent in parent_keys:

2483

if parent[:-1] != prefix:

2484

raise ValueError("mismatched prefixes for %r, %r" % (

2485

key, parent_keys))

2486

cache, history = self._kndx_cache[prefix]

1058

2487

# only want the _history index to reference the 1st index entry

1059

2488

# for version_id

1060

if version_id not in self._cache:

1061

index = len(self._history)

1062

self._history.append(version_id)

2489

if version_id not in cache:

2490

index = len(history)

2491

history.append(version_id)

1063

2492

else:

1064

index = self._cache[version_id][5]

1065

self._cache[version_id] = (version_id,

2493

index = cache[version_id][5]

2494

cache[version_id] = (version_id,

1066

2495

options,

1067

2496

pos,

1068

2497

size,

1069

2498

parents,

1070

2499

index)

1071

2500

1072

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1073

create_parent_dir=False, delay_create=False, dir_mode=None):

1074

_KnitComponentFile.__init__(self, transport, filename, mode,

1075

file_mode=file_mode,

1076

create_parent_dir=create_parent_dir,

1077

dir_mode=dir_mode)

1078

self._cache = {}

1079

# position in _history is the 'official' index for a revision

1080

# but the values may have come from a newer entry.

1081

# so - wc -l of a knit index is != the number of unique names

1082

# in the knit.

1083

self._history = []

1084

decode_utf8 = cache_utf8.decode

1085

pb = bzrlib.ui.ui_factory.nested_progress_bar()

2501

def check_header(self, fp):

2502

line = fp.readline()

2503

if line == '':

2504

# An empty file can actually be treated as though the file doesn't

2505

# exist yet.

2506

raise errors.NoSuchFile(self)

2507

if line != self.HEADER:

2508

raise KnitHeaderError(badline=line, filename=self)

2509

2510

def _check_read(self):

2511

if not self._is_locked():

2512

raise errors.ObjectNotLocked(self)

2513

if self._get_scope() != self._scope:

2514

self._reset_cache()

2515

2516

def _check_write_ok(self):

2517

"""Assert if not writes are permitted."""

2518

if not self._is_locked():

2519

raise errors.ObjectNotLocked(self)

2520

if self._get_scope() != self._scope:

2521

self._reset_cache()

2522

if self._mode != 'w':

2523

raise errors.ReadOnlyObjectDirtiedError(self)

2524

2525

def get_build_details(self, keys):

2526

"""Get the method, index_memo and compression parent for keys.

2527

2528

Ghosts are omitted from the result.

2529

2530

:param keys: An iterable of keys.

2531

:return: A dict of key:(index_memo, compression_parent, parents,

2532

record_details).

2533

index_memo

2534

opaque structure to pass to read_records to extract the raw

2535

data

2536

compression_parent

2537

Content that this record is built upon, may be None

2538

parents

2539

Logical parents of this node

2540

record_details

2541

extra information about the content which needs to be passed to

2542

Factory.parse_record

2543

"""

2544

parent_map = self.get_parent_map(keys)

2545

result = {}

2546

for key in keys:

2547

if key not in parent_map:

2548

continue # Ghost

2549

method = self.get_method(key)

2550

parents = parent_map[key]

2551

if method == 'fulltext':

2552

compression_parent = None

2553

else:

2554

compression_parent = parents[0]

2555

noeol = 'no-eol' in self.get_options(key)

2556

index_memo = self.get_position(key)

2557

result[key] = (index_memo, compression_parent,

2558

parents, (method, noeol))

2559

return result

2560

2561

def get_method(self, key):

2562

"""Return compression method of specified key."""

2563

options = self.get_options(key)

2564

if 'fulltext' in options:

2565

return 'fulltext'

2566

elif 'line-delta' in options:

2567

return 'line-delta'

2568

else:

2569

raise errors.KnitIndexUnknownMethod(self, options)

2570

2571

def get_options(self, key):

2572

"""Return a list representing options.

2573

2574

e.g. ['foo', 'bar']

2575

"""

2576

prefix, suffix = self._split_key(key)

2577

self._load_prefixes([prefix])

1086

2578

try:

1087

count = 0

1088

total = 1

1089

try:

1090

pb.update('read knit index', count, total)

1091

fp = self._transport.get(self._filename)

2579

return self._kndx_cache[prefix][0][suffix][1]

2580

except KeyError:

2581

raise RevisionNotPresent(key, self)

2582

2583

def find_ancestry(self, keys):

2584

"""See CombinedGraphIndex.find_ancestry()"""

2585

prefixes = set(key[:-1] for key in keys)

2586

self._load_prefixes(prefixes)

2587

result = {}

2588

parent_map = {}

2589

missing_keys = set()

2590

pending_keys = list(keys)

2591

# This assumes that keys will not reference parents in a different

2592

# prefix, which is accurate so far.

2593

while pending_keys:

2594

key = pending_keys.pop()

2595

if key in parent_map:

2596

continue

2597

prefix = key[:-1]

2598

try:

2599

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2600

except KeyError:

2601

missing_keys.add(key)

2602

else:

2603

parent_keys = tuple([prefix + (suffix,)

2604

for suffix in suffix_parents])

2605

parent_map[key] = parent_keys

2606

pending_keys.extend([p for p in parent_keys

2607

if p not in parent_map])

2608

return parent_map, missing_keys

2609

2610

def get_parent_map(self, keys):

2611

"""Get a map of the parents of keys.

2612

2613

:param keys: The keys to look up parents for.

2614

:return: A mapping from keys to parents. Absent keys are absent from

2615

the mapping.

2616

"""

2617

# Parse what we need to up front, this potentially trades off I/O

2618

# locality (.kndx and .knit in the same block group for the same file

2619

# id) for less checking in inner loops.

2620

prefixes = set(key[:-1] for key in keys)

2621

self._load_prefixes(prefixes)

2622

result = {}

2623

for key in keys:

2624

prefix = key[:-1]

2625

try:

2626

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2627

except KeyError:

2628

pass

2629

else:

2630

result[key] = tuple(prefix + (suffix,) for

2631

suffix in suffix_parents)

2632

return result

2633

2634

def get_position(self, key):

2635

"""Return details needed to access the version.

2636

2637

:return: a tuple (key, data position, size) to hand to the access

2638

logic to get the record.

2639

"""

2640

prefix, suffix = self._split_key(key)

2641

self._load_prefixes([prefix])

2642

entry = self._kndx_cache[prefix][0][suffix]

2643

return key, entry[2], entry[3]

2644

2645

has_key = _mod_index._has_key_from_parent_map

2646

2647

def _init_index(self, path, extra_lines=[]):

2648

"""Initialize an index."""

2649

sio = StringIO()

2650

sio.write(self.HEADER)

2651

sio.writelines(extra_lines)

2652

sio.seek(0)

2653

self._transport.put_file_non_atomic(path, sio,

2654

create_parent_dir=True)

2655

# self._create_parent_dir)

2656

# mode=self._file_mode,

2657

# dir_mode=self._dir_mode)

2658

2659

def keys(self):

2660

"""Get all the keys in the collection.

2661

2662

The keys are not ordered.

2663

"""

2664

result = set()

2665

# Identify all key prefixes.

2666

# XXX: A bit hacky, needs polish.

2667

if type(self._mapper) is ConstantMapper:

2668

prefixes = [()]

2669

else:

2670

relpaths = set()

2671

for quoted_relpath in self._transport.iter_files_recursive():

2672

path, ext = os.path.splitext(quoted_relpath)

2673

relpaths.add(path)

2674

prefixes = [self._mapper.unmap(path) for path in relpaths]

2675

self._load_prefixes(prefixes)

2676

for prefix in prefixes:

2677

for suffix in self._kndx_cache[prefix][1]:

2678

result.add(prefix + (suffix,))

2679

return result

2680

2681

def _load_prefixes(self, prefixes):

2682

"""Load the indices for prefixes."""

2683

self._check_read()

2684

for prefix in prefixes:

2685

if prefix not in self._kndx_cache:

2686

# the load_data interface writes to these variables.

2687

self._cache = {}

2688

self._history = []

2689

self._filename = prefix

1092

2690

try:

1093

self.check_header(fp)

1094

# readlines reads the whole file at once:

1095

# bad for transports like http, good for local disk

1096

# we save 60 ms doing this one change (

1097

# from calling readline each time to calling

1098

# readlines once.

1099

# probably what we want for nice behaviour on

1100

# http is a incremental readlines that yields, or

1101

# a check for local vs non local indexes,

1102

for l in fp.readlines():

1103

rec = l.split()

1104

if len(rec) < 5 or rec[-1] != ':':

1105

# corrupt line.

1106

# FIXME: in the future we should determine if its a

1107

# short write - and ignore it

1108

# or a different failure, and raise. RBC 20060407

1109

continue

1110

count += 1

1111

total += 1

1112

#pb.update('read knit index', count, total)

1113

# See self._parse_parents

1114

parents = []

1115

for value in rec[4:-1]:

1116

if '.' == value[0]:

1117

# uncompressed reference

1118

parents.append(decode_utf8(value[1:]))

1119

else:

1120

# this is 15/4000ms faster than isinstance,

1121

# (in lsprof)

1122

# this function is called thousands of times a

1123

# second so small variations add up.

1124

assert value.__class__ is str

1125

parents.append(self._history[int(value)])

1126

# end self._parse_parents

1127

# self._cache_version(decode_utf8(rec[0]),

1128

# rec[1].split(','),

1129

# int(rec[2]),

1130

# int(rec[3]),

1131

# parents)

1132

# --- self._cache_version

1133

# only want the _history index to reference the 1st

1134

# index entry for version_id

1135

version_id = decode_utf8(rec[0])

1136

if version_id not in self._cache:

1137

index = len(self._history)

1138

self._history.append(version_id)

1139

else:

1140

index = self._cache[version_id][5]

1141

self._cache[version_id] = (version_id,

1142

rec[1].split(','),

1143

int(rec[2]),

1144

int(rec[3]),

1145

parents,

1146

index)

1147

# --- self._cache_version

1148

finally:

1149

fp.close()

1150

except NoSuchFile, e:

1151

if mode != 'w' or not create:

1152

raise

1153

if delay_create:

1154

self._need_to_create = True

1155

else:

1156

self._transport.put_bytes_non_atomic(self._filename,

1157

self.HEADER, mode=self._file_mode)

1158

1159

finally:

1160

pb.update('read knit index', total, total)

1161

pb.finished()

1162

1163

def _parse_parents(self, compressed_parents):

1164

"""convert a list of string parent values into version ids.

1165

1166

ints are looked up in the index.

1167

.FOO values are ghosts and converted in to FOO.

1168

1169

NOTE: the function is retained here for clarity, and for possible

1170

use in partial index reads. However bulk processing now has

1171

it inlined in __init__ for inner-loop optimisation.

2691

path = self._mapper.map(prefix) + '.kndx'

2692

fp = self._transport.get(path)

2693

try:

2694

# _load_data may raise NoSuchFile if the target knit is

2695

# completely empty.

2696

_load_data(self, fp)

2697

finally:

2698

fp.close()

2699

self._kndx_cache[prefix] = (self._cache, self._history)

2700

del self._cache

2701

del self._filename

2702

del self._history

2703

except NoSuchFile:

2704

self._kndx_cache[prefix] = ({}, [])

2705

if type(self._mapper) is ConstantMapper:

2706

# preserve behaviour for revisions.kndx etc.

2707

self._init_index(path)

2708

del self._cache

2709

del self._filename

2710

del self._history

2711

2712

missing_keys = _mod_index._missing_keys_from_parent_map

2713

2714

def _partition_keys(self, keys):

2715

"""Turn keys into a dict of prefix:suffix_list."""

2716

result = {}

2717

for key in keys:

2718

prefix_keys = result.setdefault(key[:-1], [])

2719

prefix_keys.append(key[-1])

2720

return result

2721

2722

def _dictionary_compress(self, keys):

2723

"""Dictionary compress keys.

2724

2725

:param keys: The keys to generate references to.

2726

:return: A string representation of keys. keys which are present are

2727

dictionary compressed, and others are emitted as fulltext with a

2728

'.' prefix.

1172

2729

"""

1173

result = []

1174

for value in compressed_parents:

1175

if value[-1] == '.':

1176

# uncompressed reference

1177

result.append(cache_utf8.decode_utf8(value[1:]))

1178

else:

1179

# this is 15/4000ms faster than isinstance,

1180

# this function is called thousands of times a

1181

# second so small variations add up.

1182

assert value.__class__ is str

1183

result.append(self._history[int(value)])

1184

return result

1185

1186

def get_graph(self):

1187

graph = []

1188

for version_id, index in self._cache.iteritems():

1189

graph.append((version_id, index[4]))

1190

return graph

1191

1192

def get_ancestry(self, versions):

1193

"""See VersionedFile.get_ancestry."""

1194

# get a graph of all the mentioned versions:

1195

graph = {}

1196

pending = set(versions)

1197

while len(pending):

1198

version = pending.pop()

1199

parents = self._cache[version][4]

1200

# got the parents ok

1201

# trim ghosts

1202

parents = [parent for parent in parents if parent in self._cache]

1203

for parent in parents:

1204

# if not completed and not a ghost

1205

if parent not in graph:

1206

pending.add(parent)

1207

graph[version] = parents

1208

return topo_sort(graph.items())

1209

1210

def get_ancestry_with_ghosts(self, versions):

1211

"""See VersionedFile.get_ancestry_with_ghosts."""

1212

# get a graph of all the mentioned versions:

1213

graph = {}

1214

pending = set(versions)

1215

while len(pending):

1216

version = pending.pop()

1217

try:

1218

parents = self._cache[version][4]

1219

except KeyError:

1220

# ghost, fake it

1221

graph[version] = []

1222

pass

1223

else:

1224

# got the parents ok

1225

for parent in parents:

1226

if parent not in graph:

1227

pending.add(parent)

1228

graph[version] = parents

1229

return topo_sort(graph.items())

1230

1231

def num_versions(self):

1232

return len(self._history)

1233

1234

__len__ = num_versions

1235

1236

def get_versions(self):

1237

return self._history

1238

1239

def idx_to_name(self, idx):

1240

return self._history[idx]

1241

1242

def lookup(self, version_id):

1243

assert version_id in self._cache

1244

return self._cache[version_id][5]

1245

1246

def _version_list_to_index(self, versions):

1247

encode_utf8 = cache_utf8.encode

2730

if not keys:

2731

return ''

1248

2732

result_list = []

1249

for version in versions:

1250

if version in self._cache:

2733

prefix = keys[0][:-1]

2734

cache = self._kndx_cache[prefix][0]

2735

for key in keys:

2736

if key[:-1] != prefix:

2737

# kndx indices cannot refer across partitioned storage.

2738

raise ValueError("mismatched prefixes for %r" % keys)

2739

if key[-1] in cache:

1251

2740

# -- inlined lookup() --

1252

result_list.append(str(self._cache[version][5]))

2741

result_list.append(str(cache[key[-1]][5]))

1253

2742

# -- end lookup () --

1254

2743

else:

1255

result_list.append('.' + encode_utf8(version))

2744

result_list.append('.' + key[-1])

1256

2745

return ' '.join(result_list)

1257

2746

1258

def add_version(self, version_id, options, pos, size, parents):

1259

"""Add a version record to the index."""

1260

self.add_versions(((version_id, options, pos, size, parents),))

1261

1262

def add_versions(self, versions):

1263

"""Add multiple versions to the index.

1264

1265

:param versions: a list of tuples:

1266

(version_id, options, pos, size, parents).

1267

"""

1268

lines = []

1269

encode_utf8 = cache_utf8.encode

1270

orig_history = self._history[:]

1271

orig_cache = self._cache.copy()

1272

1273

try:

1274

for version_id, options, pos, size, parents in versions:

1275

line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),

1276

','.join(options),

1277

pos,

1278

size,

1279

self._version_list_to_index(parents))

1280

assert isinstance(line, str), \

1281

'content must be utf-8 encoded: %r' % (line,)

1282

lines.append(line)

1283

self._cache_version(version_id, options, pos, size, parents)

1284

if not self._need_to_create:

1285

self._transport.append_bytes(self._filename, ''.join(lines))

1286

else:

1287

sio = StringIO()

1288

sio.write(self.HEADER)

1289

sio.writelines(lines)

1290

sio.seek(0)

1291

self._transport.put_file_non_atomic(self._filename, sio,

1292

create_parent_dir=self._create_parent_dir,

1293

mode=self._file_mode,

1294

dir_mode=self._dir_mode)

1295

self._need_to_create = False

1296

except:

1297

# If any problems happen, restore the original values and re-raise

1298

self._history = orig_history

1299

self._cache = orig_cache

1300

raise

1301

1302

def has_version(self, version_id):

1303

"""True if the version is in the index."""

1304

return (version_id in self._cache)

1305

1306

def get_position(self, version_id):

1307

"""Return data position and size of specified version."""

1308

return (self._cache[version_id][2], \

1309

self._cache[version_id][3])

1310

1311

def get_method(self, version_id):

1312

"""Return compression method of specified version."""

1313

options = self._cache[version_id][1]

1314

if 'fulltext' in options:

2747

def _reset_cache(self):

2748

# Possibly this should be a LRU cache. A dictionary from key_prefix to

2749

# (cache_dict, history_vector) for parsed kndx files.

2750

self._kndx_cache = {}

2751

self._scope = self._get_scope()

2752

allow_writes = self._allow_writes()

2753

if allow_writes:

2754

self._mode = 'w'

2755

else:

2756

self._mode = 'r'

2757

2758

def _sort_keys_by_io(self, keys, positions):

2759

"""Figure out an optimal order to read the records for the given keys.

2760

2761

Sort keys, grouped by index and sorted by position.

2762

2763

:param keys: A list of keys whose records we want to read. This will be

2764

sorted 'in-place'.

2765

:param positions: A dict, such as the one returned by

2766

_get_components_positions()

2767

:return: None

2768

"""

2769

def get_sort_key(key):

2770

index_memo = positions[key][1]

2771

# Group by prefix and position. index_memo[0] is the key, so it is

2772

# (file_id, revision_id) and we don't want to sort on revision_id,

2773

# index_memo[1] is the position, and index_memo[2] is the size,

2774

# which doesn't matter for the sort

2775

return index_memo[0][:-1], index_memo[1]

2776

return keys.sort(key=get_sort_key)

2777

2778

_get_total_build_size = _get_total_build_size

2779

2780

def _split_key(self, key):

2781

"""Split key into a prefix and suffix."""

2782

return key[:-1], key[-1]

2783

2784

2785

class _KnitGraphIndex(object):

2786

"""A KnitVersionedFiles index layered on GraphIndex."""

2787

2788

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2789

add_callback=None, track_external_parent_refs=False):

2790

"""Construct a KnitGraphIndex on a graph_index.

2791

2792

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2793

:param is_locked: A callback to check whether the object should answer

2794

queries.

2795

:param deltas: Allow delta-compressed records.

2796

:param parents: If True, record knits parents, if not do not record

2797

parents.

2798

:param add_callback: If not None, allow additions to the index and call

2799

this callback with a list of added GraphIndex nodes:

2800

[(node, value, node_refs), ...]

2801

:param is_locked: A callback, returns True if the index is locked and

2802

thus usable.

2803

:param track_external_parent_refs: If True, record all external parent

2804

references parents from added records. These can be retrieved

2805

later by calling get_missing_parents().

2806

"""

2807

self._add_callback = add_callback

2808

self._graph_index = graph_index

2809

self._deltas = deltas

2810

self._parents = parents

2811

if deltas and not parents:

2812

# XXX: TODO: Delta tree and parent graph should be conceptually

2813

# separate.

2814

raise KnitCorrupt(self, "Cannot do delta compression without "

2815

"parent tracking.")

2816

self.has_graph = parents

2817

self._is_locked = is_locked

2818

self._missing_compression_parents = set()

2819

if track_external_parent_refs:

2820

self._key_dependencies = _KeyRefs()

2821

else:

2822

self._key_dependencies = None

2823

2824

def __repr__(self):

2825

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2826

2827

def add_records(self, records, random_id=False,

2828

missing_compression_parents=False):

2829

"""Add multiple records to the index.

2830

2831

This function does not insert data into the Immutable GraphIndex

2832

backing the KnitGraphIndex, instead it prepares data for insertion by

2833

the caller and checks that it is safe to insert then calls

2834

self._add_callback with the prepared GraphIndex nodes.

2835

2836

:param records: a list of tuples:

2837

(key, options, access_memo, parents).

2838

:param random_id: If True the ids being added were randomly generated

2839

and no check for existence will be performed.

2840

:param missing_compression_parents: If True the records being added are

2841

only compressed against texts already in the index (or inside

2842

records). If False the records all refer to unavailable texts (or

2843

texts inside records) as compression parents.

2844

"""

2845

if not self._add_callback:

2846

raise errors.ReadOnlyError(self)

2847

# we hope there are no repositories with inconsistent parentage

2848

# anymore.

2849

2850

keys = {}

2851

compression_parents = set()

2852

key_dependencies = self._key_dependencies

2853

for (key, options, access_memo, parents) in records:

2854

if self._parents:

2855

parents = tuple(parents)

2856

if key_dependencies is not None:

2857

key_dependencies.add_references(key, parents)

2858

index, pos, size = access_memo

2859

if 'no-eol' in options:

2860

value = 'N'

2861

else:

2862

value = ' '

2863

value += "%d %d" % (pos, size)

2864

if not self._deltas:

2865

if 'line-delta' in options:

2866

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

2867

if self._parents:

2868

if self._deltas:

2869

if 'line-delta' in options:

2870

node_refs = (parents, (parents[0],))

2871

if missing_compression_parents:

2872

compression_parents.add(parents[0])

2873

else:

2874

node_refs = (parents, ())

2875

else:

2876

node_refs = (parents, )

2877

else:

2878

if parents:

2879

raise KnitCorrupt(self, "attempt to add node with parents "

2880

"in parentless index.")

2881

node_refs = ()

2882

keys[key] = (value, node_refs)

2883

# check for dups

2884

if not random_id:

2885

present_nodes = self._get_entries(keys)

2886

for (index, key, value, node_refs) in present_nodes:

2887

parents = node_refs[:1]

2888

# Sometimes these are passed as a list rather than a tuple

2889

passed = static_tuple.as_tuples(keys[key])

2890

passed_parents = passed[1][:1]

2891

if (value[0] != keys[key][0][0] or

2892

parents != passed_parents):

2893

node_refs = static_tuple.as_tuples(node_refs)

2894

raise KnitCorrupt(self, "inconsistent details in add_records"

2895

": %s %s" % ((value, node_refs), passed))

2896

del keys[key]

2897

result = []

2898

if self._parents:

2899

for key, (value, node_refs) in keys.iteritems():

2900

result.append((key, value, node_refs))

2901

else:

2902

for key, (value, node_refs) in keys.iteritems():

2903

result.append((key, value))

2904

self._add_callback(result)

2905

if missing_compression_parents:

2906

# This may appear to be incorrect (it does not check for

2907

# compression parents that are in the existing graph index),

2908

# but such records won't have been buffered, so this is

2909

# actually correct: every entry when

2910

# missing_compression_parents==True either has a missing parent, or

2911

# a parent that is one of the keys in records.

2912

compression_parents.difference_update(keys)

2913

self._missing_compression_parents.update(compression_parents)

2914

# Adding records may have satisfied missing compression parents.

2915

self._missing_compression_parents.difference_update(keys)

2916

2917

def scan_unvalidated_index(self, graph_index):

2918

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2919

2920

This allows this _KnitGraphIndex to keep track of any missing

2921

compression parents we may want to have filled in to make those

2922

indices valid.

2923

2924

:param graph_index: A GraphIndex

2925

"""

2926

if self._deltas:

2927

new_missing = graph_index.external_references(ref_list_num=1)

2928

new_missing.difference_update(self.get_parent_map(new_missing))

2929

self._missing_compression_parents.update(new_missing)

2930

if self._key_dependencies is not None:

2931

# Add parent refs from graph_index (and discard parent refs that

2932

# the graph_index has).

2933

for node in graph_index.iter_all_entries():

2934

self._key_dependencies.add_references(node[1], node[3][0])

2935

2936

def get_missing_compression_parents(self):

2937

"""Return the keys of missing compression parents.

2938

2939

Missing compression parents occur when a record stream was missing

2940

basis texts, or a index was scanned that had missing basis texts.

2941

"""

2942

return frozenset(self._missing_compression_parents)

2943

2944

def get_missing_parents(self):

2945

"""Return the keys of missing parents."""

2946

# If updating this, you should also update

2947

# groupcompress._GCGraphIndex.get_missing_parents

2948

# We may have false positives, so filter those out.

2949

self._key_dependencies.satisfy_refs_for_keys(

2950

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

2951

return frozenset(self._key_dependencies.get_unsatisfied_refs())

2952

2953

def _check_read(self):

2954

"""raise if reads are not permitted."""

2955

if not self._is_locked():

2956

raise errors.ObjectNotLocked(self)

2957

2958

def _check_write_ok(self):

2959

"""Assert if writes are not permitted."""

2960

if not self._is_locked():

2961

raise errors.ObjectNotLocked(self)

2962

2963

def _compression_parent(self, an_entry):

2964

# return the key that an_entry is compressed against, or None

2965

# Grab the second parent list (as deltas implies parents currently)

2966

compression_parents = an_entry[3][1]

2967

if not compression_parents:

2968

return None

2969

if len(compression_parents) != 1:

2970

raise AssertionError(

2971

"Too many compression parents: %r" % compression_parents)

2972

return compression_parents[0]

2973

2974

def get_build_details(self, keys):

2975

"""Get the method, index_memo and compression parent for version_ids.

2976

2977

Ghosts are omitted from the result.

2978

2979

:param keys: An iterable of keys.

2980

:return: A dict of key:

2981

(index_memo, compression_parent, parents, record_details).

2982

index_memo

2983

opaque structure to pass to read_records to extract the raw

2984

data

2985

compression_parent

2986

Content that this record is built upon, may be None

2987

parents

2988

Logical parents of this node

2989

record_details

2990

extra information about the content which needs to be passed to

2991

Factory.parse_record

2992

"""

2993

self._check_read()

2994

result = {}

2995

entries = self._get_entries(keys, False)

2996

for entry in entries:

2997

key = entry[1]

2998

if not self._parents:

2999

parents = ()

3000

else:

3001

parents = entry[3][0]

3002

if not self._deltas:

3003

compression_parent_key = None

3004

else:

3005

compression_parent_key = self._compression_parent(entry)

3006

noeol = (entry[2][0] == 'N')

3007

if compression_parent_key:

3008

method = 'line-delta'

3009

else:

3010

method = 'fulltext'

3011

result[key] = (self._node_to_position(entry),

3012

compression_parent_key, parents,

3013

(method, noeol))

3014

return result

3015

3016

def _get_entries(self, keys, check_present=False):

3017

"""Get the entries for keys.

3018

3019

:param keys: An iterable of index key tuples.

3020

"""

3021

keys = set(keys)

3022

found_keys = set()

3023

if self._parents:

3024

for node in self._graph_index.iter_entries(keys):

3025

yield node

3026

found_keys.add(node[1])

3027

else:

3028

# adapt parentless index to the rest of the code.

3029

for node in self._graph_index.iter_entries(keys):

3030

yield node[0], node[1], node[2], ()

3031

found_keys.add(node[1])

3032

if check_present:

3033

missing_keys = keys.difference(found_keys)

3034

if missing_keys:

3035

raise RevisionNotPresent(missing_keys.pop(), self)

3036

3037

def get_method(self, key):

3038

"""Return compression method of specified key."""

3039

return self._get_method(self._get_node(key))

3040

3041

def _get_method(self, node):

3042

if not self._deltas:

1315

3043

return 'fulltext'

1316

else:

1317

assert 'line-delta' in options

3044

if self._compression_parent(node):

1318

3045

return 'line-delta'

1319

1320

def get_options(self, version_id):

1321

return self._cache[version_id][1]

1322

1323

def get_parents(self, version_id):

1324

"""Return parents of specified version ignoring ghosts."""

1325

return [parent for parent in self._cache[version_id][4]

1326

if parent in self._cache]

1327

1328

def get_parents_with_ghosts(self, version_id):

1329

"""Return parents of specified version with ghosts."""

1330

return self._cache[version_id][4]

1331

1332

def check_versions_present(self, version_ids):

1333

"""Check that all specified versions are present."""

1334

version_ids = set(version_ids)

1335

for version_id in list(version_ids):

1336

if version_id in self._cache:

1337

version_ids.remove(version_id)

1338

if version_ids:

1339

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1340

1341

1342

class _KnitData(_KnitComponentFile):

1343

"""Contents of the knit data file"""

1344

1345

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1346

create_parent_dir=False, delay_create=False,

1347

dir_mode=None):

1348

_KnitComponentFile.__init__(self, transport, filename, mode,

1349

file_mode=file_mode,

1350

create_parent_dir=create_parent_dir,

1351

dir_mode=dir_mode)

1352

self._checked = False

1353

# TODO: jam 20060713 conceptually, this could spill to disk

1354

# if the cached size gets larger than a certain amount

1355

# but it complicates the model a bit, so for now just use

1356

# a simple dictionary

1357

self._cache = {}

1358

self._do_cache = False

1359

if create:

1360

if delay_create:

1361

self._need_to_create = create

1362

else:

1363

self._transport.put_bytes_non_atomic(self._filename, '',

1364

mode=self._file_mode)

1365

1366

def enable_cache(self):

1367

"""Enable caching of reads."""

1368

self._do_cache = True

1369

1370

def clear_cache(self):

1371

"""Clear the record cache."""

1372

self._do_cache = False

1373

self._cache = {}

1374

1375

def _open_file(self):

1376

try:

1377

return self._transport.get(self._filename)

1378

except NoSuchFile:

1379

pass

1380

return None

1381

1382

def _record_to_data(self, version_id, digest, lines):

1383

"""Convert version_id, digest, lines into a raw data block.

1384

1385

:return: (len, a StringIO instance with the raw data ready to read.)

1386

"""

1387

sio = StringIO()

1388

data_file = GzipFile(None, mode='wb', fileobj=sio)

1389

1390

version_id_utf8 = cache_utf8.encode(version_id)

1391

data_file.writelines(chain(

1392

["version %s %d %s\n" % (version_id_utf8,

1393

len(lines),

1394

digest)],

1395

lines,

1396

["end %s\n" % version_id_utf8]))

1397

data_file.close()

1398

length= sio.tell()

1399

1400

sio.seek(0)

1401

return length, sio

1402

1403

def add_raw_record(self, raw_data):

1404

"""Append a prepared record to the data file.

1405

1406

:return: the offset in the data file raw_data was written.

1407

"""

1408

assert isinstance(raw_data, str), 'data must be plain bytes'

1409

if not self._need_to_create:

1410

return self._transport.append_bytes(self._filename, raw_data)

1411

else:

1412

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1413

create_parent_dir=self._create_parent_dir,

1414

mode=self._file_mode,

1415

dir_mode=self._dir_mode)

1416

self._need_to_create = False

1417

return 0

1418

1419

def add_record(self, version_id, digest, lines):

1420

"""Write new text record to disk. Returns the position in the

1421

file where it was written."""

1422

size, sio = self._record_to_data(version_id, digest, lines)

1423

# write to disk

1424

if not self._need_to_create:

1425

start_pos = self._transport.append_file(self._filename, sio)

1426

else:

1427

self._transport.put_file_non_atomic(self._filename, sio,

1428

create_parent_dir=self._create_parent_dir,

1429

mode=self._file_mode,

1430

dir_mode=self._dir_mode)

1431

self._need_to_create = False

1432

start_pos = 0

1433

if self._do_cache:

1434

self._cache[version_id] = sio.getvalue()

1435

return start_pos, size

1436

1437

def _parse_record_header(self, version_id, raw_data):

1438

"""Parse a record header for consistency.

1439

1440

:return: the header and the decompressor stream.

1441

as (stream, header_record)

1442

"""

1443

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1444

rec = df.readline().split()

1445

if len(rec) != 4:

1446

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1447

if cache_utf8.decode(rec[1]) != version_id:

1448

raise KnitCorrupt(self._filename,

1449

'unexpected version, wanted %r, got %r' % (

1450

version_id, rec[1]))

1451

return df, rec

1452

1453

def _parse_record(self, version_id, data):

1454

# profiling notes:

1455

# 4168 calls in 2880 217 internal

1456

# 4168 calls to _parse_record_header in 2121

1457

# 4168 calls to readlines in 330

1458

df, rec = self._parse_record_header(version_id, data)

1459

record_contents = df.readlines()

1460

l = record_contents.pop()

1461

assert len(record_contents) == int(rec[2])

1462

if l != 'end %s\n' % cache_utf8.encode(version_id):

1463

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1464

% (l, version_id))

1465

df.close()

1466

return record_contents, rec[3]

1467

1468

def read_records_iter_raw(self, records):

1469

"""Read text records from data file and yield raw data.

1470

1471

This unpacks enough of the text record to validate the id is

1472

as expected but thats all.

1473

"""

1474

# setup an iterator of the external records:

1475

# uses readv so nice and fast we hope.

1476

if len(records):

1477

# grab the disk data needed.

1478

if self._cache:

1479

# Don't check _cache if it is empty

1480

needed_offsets = [(pos, size) for version_id, pos, size

1481

in records

1482

if version_id not in self._cache]

1483

else:

1484

needed_offsets = [(pos, size) for version_id, pos, size

1485

in records]

1486

1487

raw_records = self._transport.readv(self._filename, needed_offsets)

1488

1489

1490

for version_id, pos, size in records:

1491

if version_id in self._cache:

1492

# This data has already been validated

1493

data = self._cache[version_id]

1494

else:

1495

pos, data = raw_records.next()

1496

if self._do_cache:

1497

self._cache[version_id] = data

1498

1499

# validate the header

1500

df, rec = self._parse_record_header(version_id, data)

1501

df.close()

1502

yield version_id, data

1503

1504

def read_records_iter(self, records):

1505

"""Read text records from data file and yield result.

1506

1507

The result will be returned in whatever is the fastest to read.

1508

Not by the order requested. Also, multiple requests for the same

1509

record will only yield 1 response.

1510

:param records: A list of (version_id, pos, len) entries

1511

:return: Yields (version_id, contents, digest) in the order

1512

read, not the order requested

1513

"""

1514

if not records:

1515

return

1516

1517

if self._cache:

1518

# Skip records we have alread seen

1519

yielded_records = set()

1520

needed_records = set()

1521

for record in records:

1522

if record[0] in self._cache:

1523

if record[0] in yielded_records:

1524

continue

1525

yielded_records.add(record[0])

1526

data = self._cache[record[0]]

1527

content, digest = self._parse_record(record[0], data)

1528

yield (record[0], content, digest)

1529

else:

1530

needed_records.add(record)

1531

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1532

else:

1533

needed_records = sorted(set(records), key=operator.itemgetter(1))

1534

1535

if not needed_records:

1536

return

1537

1538

# The transport optimizes the fetching as well

1539

# (ie, reads continuous ranges.)

1540

readv_response = self._transport.readv(self._filename,

1541

[(pos, size) for version_id, pos, size in needed_records])

1542

1543

for (version_id, pos, size), (pos, data) in \

1544

izip(iter(needed_records), readv_response):

1545

content, digest = self._parse_record(version_id, data)

1546

if self._do_cache:

1547

self._cache[version_id] = data

1548

yield version_id, content, digest

1549

1550

def read_records(self, records):

1551

"""Read records into a dictionary."""

1552

components = {}

1553

for record_id, content, digest in \

1554

self.read_records_iter(records):

1555

components[record_id] = (content, digest)

1556

return components

1557

1558

1559

class InterKnit(InterVersionedFile):

1560

"""Optimised code paths for knit to knit operations."""

1561

1562

_matching_file_from_factory = KnitVersionedFile

1563

_matching_file_to_factory = KnitVersionedFile

1564

1565

@staticmethod

1566

def is_compatible(source, target):

1567

"""Be compatible with knits. """

1568

try:

1569

return (isinstance(source, KnitVersionedFile) and

1570

isinstance(target, KnitVersionedFile))

1571

except AttributeError:

1572

return False

1573

1574

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1575

"""See InterVersionedFile.join."""

1576

assert isinstance(self.source, KnitVersionedFile)

1577

assert isinstance(self.target, KnitVersionedFile)

1578

1579

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1580

1581

if not version_ids:

1582

return 0

1583

1584

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1585

try:

1586

version_ids = list(version_ids)

1587

if None in version_ids:

1588

version_ids.remove(None)

1589

1590

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1591

this_versions = set(self.target._index.get_versions())

1592

needed_versions = self.source_ancestry - this_versions

1593

cross_check_versions = self.source_ancestry.intersection(this_versions)

1594

mismatched_versions = set()

1595

for version in cross_check_versions:

1596

# scan to include needed parents.

1597

n1 = set(self.target.get_parents_with_ghosts(version))

1598

n2 = set(self.source.get_parents_with_ghosts(version))

1599

if n1 != n2:

1600

# FIXME TEST this check for cycles being introduced works

1601

# the logic is we have a cycle if in our graph we are an

1602

# ancestor of any of the n2 revisions.

1603

for parent in n2:

1604

if parent in n1:

1605

# safe

1606

continue

1607

else:

1608

parent_ancestors = self.source.get_ancestry(parent)

1609

if version in parent_ancestors:

1610

raise errors.GraphCycleError([parent, version])

1611

# ensure this parent will be available later.

1612

new_parents = n2.difference(n1)

1613

needed_versions.update(new_parents.difference(this_versions))

1614

mismatched_versions.add(version)

1615

1616

if not needed_versions and not mismatched_versions:

1617

return 0

1618

full_list = topo_sort(self.source.get_graph())

1619

1620

version_list = [i for i in full_list if (not self.target.has_version(i)

1621

and i in needed_versions)]

1622

1623

# plan the join:

1624

copy_queue = []

1625

copy_queue_records = []

1626

copy_set = set()

1627

for version_id in version_list:

1628

options = self.source._index.get_options(version_id)

1629

parents = self.source._index.get_parents_with_ghosts(version_id)

1630

# check that its will be a consistent copy:

1631

for parent in parents:

1632

# if source has the parent, we must :

1633

# * already have it or

1634

# * have it scheduled already

1635

# otherwise we don't care

1636

assert (self.target.has_version(parent) or

1637

parent in copy_set or

1638

not self.source.has_version(parent))

1639

data_pos, data_size = self.source._index.get_position(version_id)

1640

copy_queue_records.append((version_id, data_pos, data_size))

1641

copy_queue.append((version_id, options, parents))

1642

copy_set.add(version_id)

1643

1644

# data suck the join:

1645

count = 0

1646

total = len(version_list)

1647

raw_datum = []

1648

raw_records = []

1649

for (version_id, raw_data), \

1650

(version_id2, options, parents) in \

1651

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1652

copy_queue):

1653

assert version_id == version_id2, 'logic error, inconsistent results'

1654

count = count + 1

1655

pb.update("Joining knit", count, total)

1656

raw_records.append((version_id, options, parents, len(raw_data)))

1657

raw_datum.append(raw_data)

1658

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1659

1660

for version in mismatched_versions:

1661

# FIXME RBC 20060309 is this needed?

1662

n1 = set(self.target.get_parents_with_ghosts(version))

1663

n2 = set(self.source.get_parents_with_ghosts(version))

1664

# write a combined record to our history preserving the current

1665

# parents as first in the list

1666

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1667

self.target.fix_parents(version, new_parents)

1668

return count

1669

finally:

1670

pb.finished()

1671

1672

1673

InterVersionedFile.register_optimiser(InterKnit)

1674

1675

1676

class WeaveToKnit(InterVersionedFile):

1677

"""Optimised code paths for weave to knit operations."""

1678

1679

_matching_file_from_factory = bzrlib.weave.WeaveFile

1680

_matching_file_to_factory = KnitVersionedFile

1681

1682

@staticmethod

1683

def is_compatible(source, target):

1684

"""Be compatible with weaves to knits."""

1685

try:

1686

return (isinstance(source, bzrlib.weave.Weave) and

1687

isinstance(target, KnitVersionedFile))

1688

except AttributeError:

1689

return False

1690

1691

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1692

"""See InterVersionedFile.join."""

1693

assert isinstance(self.source, bzrlib.weave.Weave)

1694

assert isinstance(self.target, KnitVersionedFile)

1695

1696

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1697

1698

if not version_ids:

1699

return 0

1700

1701

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1702

try:

1703

version_ids = list(version_ids)

1704

1705

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1706

this_versions = set(self.target._index.get_versions())

1707

needed_versions = self.source_ancestry - this_versions

1708

cross_check_versions = self.source_ancestry.intersection(this_versions)

1709

mismatched_versions = set()

1710

for version in cross_check_versions:

1711

# scan to include needed parents.

1712

n1 = set(self.target.get_parents_with_ghosts(version))

1713

n2 = set(self.source.get_parents(version))

1714

# if all of n2's parents are in n1, then its fine.

1715

if n2.difference(n1):

1716

# FIXME TEST this check for cycles being introduced works

1717

# the logic is we have a cycle if in our graph we are an

1718

# ancestor of any of the n2 revisions.

1719

for parent in n2:

1720

if parent in n1:

1721

# safe

1722

continue

1723

else:

1724

parent_ancestors = self.source.get_ancestry(parent)

1725

if version in parent_ancestors:

1726

raise errors.GraphCycleError([parent, version])

1727

# ensure this parent will be available later.

1728

new_parents = n2.difference(n1)

1729

needed_versions.update(new_parents.difference(this_versions))

1730

mismatched_versions.add(version)

1731

1732

if not needed_versions and not mismatched_versions:

1733

return 0

1734

full_list = topo_sort(self.source.get_graph())

1735

1736

version_list = [i for i in full_list if (not self.target.has_version(i)

1737

and i in needed_versions)]

1738

1739

# do the join:

1740

count = 0

1741

total = len(version_list)

1742

for version_id in version_list:

1743

pb.update("Converting to knit", count, total)

1744

parents = self.source.get_parents(version_id)

1745

# check that its will be a consistent copy:

1746

for parent in parents:

1747

# if source has the parent, we must already have it

1748

assert (self.target.has_version(parent))

1749

self.target.add_lines(

1750

version_id, parents, self.source.get_lines(version_id))

1751

count = count + 1

1752

1753

for version in mismatched_versions:

1754

# FIXME RBC 20060309 is this needed?

1755

n1 = set(self.target.get_parents_with_ghosts(version))

1756

n2 = set(self.source.get_parents(version))

1757

# write a combined record to our history preserving the current

1758

# parents as first in the list

1759

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1760

self.target.fix_parents(version, new_parents)

1761

return count

1762

finally:

1763

pb.finished()

1764

1765

1766

InterVersionedFile.register_optimiser(WeaveToKnit)

1767

1768

1769

class KnitSequenceMatcher(difflib.SequenceMatcher):

1770

"""Knit tuned sequence matcher.

1771

1772

This is based on profiling of difflib which indicated some improvements

1773

for our usage pattern.

1774

"""

1775

1776

def find_longest_match(self, alo, ahi, blo, bhi):

1777

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1778

1779

If isjunk is not defined:

1780

1781

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1782

alo <= i <= i+k <= ahi

1783

blo <= j <= j+k <= bhi

1784

and for all (i',j',k') meeting those conditions,

1785

k >= k'

1786

i <= i'

1787

and if i == i', j <= j'

1788

1789

In other words, of all maximal matching blocks, return one that

1790

starts earliest in a, and of all those maximal matching blocks that

1791

start earliest in a, return the one that starts earliest in b.

1792

1793

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1794

>>> s.find_longest_match(0, 5, 0, 9)

1795

(0, 4, 5)

1796

1797

If isjunk is defined, first the longest matching block is

1798

determined as above, but with the additional restriction that no

1799

junk element appears in the block. Then that block is extended as

1800

far as possible by matching (only) junk elements on both sides. So

1801

the resulting block never matches on junk except as identical junk

1802

happens to be adjacent to an "interesting" match.

1803

1804

Here's the same example as before, but considering blanks to be

1805

junk. That prevents " abcd" from matching the " abcd" at the tail

1806

end of the second sequence directly. Instead only the "abcd" can

1807

match, and matches the leftmost "abcd" in the second sequence:

1808

1809

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1810

>>> s.find_longest_match(0, 5, 0, 9)

1811

(1, 0, 4)

1812

1813

If no blocks match, return (alo, blo, 0).

1814

1815

>>> s = SequenceMatcher(None, "ab", "c")

1816

>>> s.find_longest_match(0, 2, 0, 1)

1817

(0, 0, 0)

1818

"""

1819

1820

# CAUTION: stripping common prefix or suffix would be incorrect.

1821

# E.g.,

1822

# ab

1823

# acab

1824

# Longest matching block is "ab", but if common prefix is

1825

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1826

# strip, so ends up claiming that ab is changed to acab by

1827

# inserting "ca" in the middle. That's minimal but unintuitive:

1828

# "it's obvious" that someone inserted "ac" at the front.

1829

# Windiff ends up at the same place as diff, but by pairing up

1830

# the unique 'b's and then matching the first two 'a's.

1831

1832

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1833

besti, bestj, bestsize = alo, blo, 0

1834

# find longest junk-free match

1835

# during an iteration of the loop, j2len[j] = length of longest

1836

# junk-free match ending with a[i-1] and b[j]

1837

j2len = {}

1838

# nothing = []

1839

b2jget = b2j.get

1840

for i in xrange(alo, ahi):

1841

# look at all instances of a[i] in b; note that because

1842

# b2j has no junk keys, the loop is skipped if a[i] is junk

1843

j2lenget = j2len.get

1844

newj2len = {}

3046

else:

3047

return 'fulltext'

3048

3049

def _get_node(self, key):

3050

try:

3051

return list(self._get_entries([key]))[0]

3052

except IndexError:

3053

raise RevisionNotPresent(key, self)

3054

3055

def get_options(self, key):

3056

"""Return a list representing options.

3057

3058

e.g. ['foo', 'bar']

3059

"""

3060

node = self._get_node(key)

3061

options = [self._get_method(node)]

3062

if node[2][0] == 'N':

3063

options.append('no-eol')

3064

return options

3065

3066

def find_ancestry(self, keys):

3067

"""See CombinedGraphIndex.find_ancestry()"""

3068

return self._graph_index.find_ancestry(keys, 0)

3069

3070

def get_parent_map(self, keys):

3071

"""Get a map of the parents of keys.

3072

3073

:param keys: The keys to look up parents for.

3074

:return: A mapping from keys to parents. Absent keys are absent from

3075

the mapping.

3076

"""

3077

self._check_read()

3078

nodes = self._get_entries(keys)

3079

result = {}

3080

if self._parents:

3081

for node in nodes:

3082

result[node[1]] = node[3][0]

3083

else:

3084

for node in nodes:

3085

result[node[1]] = None

3086

return result

3087

3088

def get_position(self, key):

3089

"""Return details needed to access the version.

3090

3091

:return: a tuple (index, data position, size) to hand to the access

3092

logic to get the record.

3093

"""

3094

node = self._get_node(key)

3095

return self._node_to_position(node)

3096

3097

has_key = _mod_index._has_key_from_parent_map

3098

3099

def keys(self):

3100

"""Get all the keys in the collection.

3101

3102

The keys are not ordered.

3103

"""

3104

self._check_read()

3105

return [node[1] for node in self._graph_index.iter_all_entries()]

3106

3107

missing_keys = _mod_index._missing_keys_from_parent_map

3108

3109

def _node_to_position(self, node):

3110

"""Convert an index value to position details."""

3111

bits = node[2][1:].split(' ')

3112

return node[0], int(bits[0]), int(bits[1])

3113

3114

def _sort_keys_by_io(self, keys, positions):

3115

"""Figure out an optimal order to read the records for the given keys.

3116

3117

Sort keys, grouped by index and sorted by position.

3118

3119

:param keys: A list of keys whose records we want to read. This will be

3120

sorted 'in-place'.

3121

:param positions: A dict, such as the one returned by

3122

_get_components_positions()

3123

:return: None

3124

"""

3125

def get_index_memo(key):

3126

# index_memo is at offset [1]. It is made up of (GraphIndex,

3127

# position, size). GI is an object, which will be unique for each

3128

# pack file. This causes us to group by pack file, then sort by

3129

# position. Size doesn't matter, but it isn't worth breaking up the

3130

# tuple.

3131

return positions[key][1]

3132

return keys.sort(key=get_index_memo)

3133

3134

_get_total_build_size = _get_total_build_size

3135

3136

3137

class _KnitKeyAccess(object):

3138

"""Access to records in .knit files."""

3139

3140

def __init__(self, transport, mapper):

3141

"""Create a _KnitKeyAccess with transport and mapper.

3142

3143

:param transport: The transport the access object is rooted at.

3144

:param mapper: The mapper used to map keys to .knit files.

3145

"""

3146

self._transport = transport

3147

self._mapper = mapper

3148

3149

def add_raw_records(self, key_sizes, raw_data):

3150

"""Add raw knit bytes to a storage area.

3151

3152

The data is spooled to the container writer in one bytes-record per

3153

raw data item.

3154

3155

:param sizes: An iterable of tuples containing the key and size of each

3156

raw data segment.

3157

:param raw_data: A bytestring containing the data.

3158

:return: A list of memos to retrieve the record later. Each memo is an

3159

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3160

length), where the key is the record key.

3161

"""

3162

if type(raw_data) is not str:

3163

raise AssertionError(

3164

'data must be plain bytes was %s' % type(raw_data))

3165

result = []

3166

offset = 0

3167

# TODO: This can be tuned for writing to sftp and other servers where

3168

# append() is relatively expensive by grouping the writes to each key

3169

# prefix.

3170

for key, size in key_sizes:

3171

path = self._mapper.map(key)

3172

try:

3173

base = self._transport.append_bytes(path + '.knit',

3174

raw_data[offset:offset+size])

3175

except errors.NoSuchFile:

3176

self._transport.mkdir(osutils.dirname(path))

3177

base = self._transport.append_bytes(path + '.knit',

3178

raw_data[offset:offset+size])

3179

# if base == 0:

3180

# chmod.

3181

offset += size

3182

result.append((key, base, size))

3183

return result

3184

3185

def flush(self):

3186

"""Flush pending writes on this access object.

3187

3188

For .knit files this is a no-op.

3189

"""

3190

pass

3191

3192

def get_raw_records(self, memos_for_retrieval):

3193

"""Get the raw bytes for a records.

3194

3195

:param memos_for_retrieval: An iterable containing the access memo for

3196

retrieving the bytes.

3197

:return: An iterator over the bytes of the records.

3198

"""

3199

# first pass, group into same-index request to minimise readv's issued.

3200

request_lists = []

3201

current_prefix = None

3202

for (key, offset, length) in memos_for_retrieval:

3203

if current_prefix == key[:-1]:

3204

current_list.append((offset, length))

3205

else:

3206

if current_prefix is not None:

3207

request_lists.append((current_prefix, current_list))

3208

current_prefix = key[:-1]

3209

current_list = [(offset, length)]

3210

# handle the last entry

3211

if current_prefix is not None:

3212

request_lists.append((current_prefix, current_list))

3213

for prefix, read_vector in request_lists:

3214

path = self._mapper.map(prefix) + '.knit'

3215

for pos, data in self._transport.readv(path, read_vector):

3216

yield data

3217

3218

3219

class _DirectPackAccess(object):

3220

"""Access to data in one or more packs with less translation."""

3221

3222

def __init__(self, index_to_packs, reload_func=None, flush_func=None):

3223

"""Create a _DirectPackAccess object.

3224

3225

:param index_to_packs: A dict mapping index objects to the transport

3226

and file names for obtaining data.

3227

:param reload_func: A function to call if we determine that the pack

3228

files have moved and we need to reload our caches. See

3229

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

3230

"""

3231

self._container_writer = None

3232

self._write_index = None

3233

self._indices = index_to_packs

3234

self._reload_func = reload_func

3235

self._flush_func = flush_func

3236

3237

def add_raw_records(self, key_sizes, raw_data):

3238

"""Add raw knit bytes to a storage area.

3239

3240

The data is spooled to the container writer in one bytes-record per

3241

raw data item.

3242

3243

:param sizes: An iterable of tuples containing the key and size of each

3244

raw data segment.

3245

:param raw_data: A bytestring containing the data.

3246

:return: A list of memos to retrieve the record later. Each memo is an

3247

opaque index memo. For _DirectPackAccess the memo is (index, pos,

3248

length), where the index field is the write_index object supplied

3249

to the PackAccess object.

3250

"""

3251

if type(raw_data) is not str:

3252

raise AssertionError(

3253

'data must be plain bytes was %s' % type(raw_data))

3254

result = []

3255

offset = 0

3256

for key, size in key_sizes:

3257

p_offset, p_length = self._container_writer.add_bytes_record(

3258

raw_data[offset:offset+size], [])

3259

offset += size

3260

result.append((self._write_index, p_offset, p_length))

3261

return result

3262

3263

def flush(self):

3264

"""Flush pending writes on this access object.

3265

3266

This will flush any buffered writes to a NewPack.

3267

"""

3268

if self._flush_func is not None:

3269

self._flush_func()

1845

3270

1846

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1847

# following improvement

1848

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1849

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1850

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1851

# to

1852

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1853

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1854

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

3271

def get_raw_records(self, memos_for_retrieval):

3272

"""Get the raw bytes for a records.

1855

3273

3274

:param memos_for_retrieval: An iterable containing the (index, pos,

3275

length) memo for retrieving the bytes. The Pack access method

3276

looks up the pack to use for a given record in its index_to_pack

3277

map.

3278

:return: An iterator over the bytes of the records.

3279

"""

3280

# first pass, group into same-index requests

3281

request_lists = []

3282

current_index = None

3283

for (index, offset, length) in memos_for_retrieval:

3284

if current_index == index:

3285

current_list.append((offset, length))

3286

else:

3287

if current_index is not None:

3288

request_lists.append((current_index, current_list))

3289

current_index = index

3290

current_list = [(offset, length)]

3291

# handle the last entry

3292

if current_index is not None:

3293

request_lists.append((current_index, current_list))

3294

for index, offsets in request_lists:

1856

3295

try:

1857

js = b2j[a[i]]

3296

transport, path = self._indices[index]

1858

3297

except KeyError:

1859

pass

3298

# A KeyError here indicates that someone has triggered an index

3299

# reload, and this index has gone missing, we need to start

3300

# over.

3301

if self._reload_func is None:

3302

# If we don't have a _reload_func there is nothing that can

3303

# be done

3304

raise

3305

raise errors.RetryWithNewPacks(index,

3306

reload_occurred=True,

3307

exc_info=sys.exc_info())

3308

try:

3309

reader = pack.make_readv_reader(transport, path, offsets)

3310

for names, read_func in reader.iter_records():

3311

yield read_func(None)

3312

except errors.NoSuchFile:

3313

# A NoSuchFile error indicates that a pack file has gone

3314

# missing on disk, we need to trigger a reload, and start over.

3315

if self._reload_func is None:

3316

raise

3317

raise errors.RetryWithNewPacks(transport.abspath(path),

3318

reload_occurred=False,

3319

exc_info=sys.exc_info())

3320

3321

def set_writer(self, writer, index, transport_packname):

3322

"""Set a writer to use for adding data."""

3323

if index is not None:

3324

self._indices[index] = transport_packname

3325

self._container_writer = writer

3326

self._write_index = index

3327

3328

def reload_or_raise(self, retry_exc):

3329

"""Try calling the reload function, or re-raise the original exception.

3330

3331

This should be called after _DirectPackAccess raises a

3332

RetryWithNewPacks exception. This function will handle the common logic

3333

of determining when the error is fatal versus being temporary.

3334

It will also make sure that the original exception is raised, rather

3335

than the RetryWithNewPacks exception.

3336

3337

If this function returns, then the calling function should retry

3338

whatever operation was being performed. Otherwise an exception will

3339

be raised.

3340

3341

:param retry_exc: A RetryWithNewPacks exception.

3342

"""

3343

is_error = False

3344

if self._reload_func is None:

3345

is_error = True

3346

elif not self._reload_func():

3347

# The reload claimed that nothing changed

3348

if not retry_exc.reload_occurred:

3349

# If there wasn't an earlier reload, then we really were

3350

# expecting to find changes. We didn't find them, so this is a

3351

# hard error

3352

is_error = True

3353

if is_error:

3354

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3355

raise exc_class, exc_value, exc_traceback

3356

3357

3358

def annotate_knit(knit, revision_id):

3359

"""Annotate a knit with no cached annotations.

3360

3361

This implementation is for knits with no cached annotations.

3362

It will work for knits with cached annotations, but this is not

3363

recommended.

3364

"""

3365

annotator = _KnitAnnotator(knit)

3366

return iter(annotator.annotate_flat(revision_id))

3367

3368

3369

class _KnitAnnotator(annotate.Annotator):

3370

"""Build up the annotations for a text."""

3371

3372

def __init__(self, vf):

3373

annotate.Annotator.__init__(self, vf)

3374

3375

# TODO: handle Nodes which cannot be extracted

3376

# self._ghosts = set()

3377

3378

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3379

self._matching_blocks = {}

3380

3381

# KnitContent objects

3382

self._content_objects = {}

3383

# The number of children that depend on this fulltext content object

3384

self._num_compression_children = {}

3385

# Delta records that need their compression parent before they can be

3386

# expanded

3387

self._pending_deltas = {}

3388

# Fulltext records that are waiting for their parents fulltexts before

3389

# they can be yielded for annotation

3390

self._pending_annotation = {}

3391

3392

self._all_build_details = {}

3393

3394

def _get_build_graph(self, key):

3395

"""Get the graphs for building texts and annotations.

3396

3397

The data you need for creating a full text may be different than the

3398

data you need to annotate that text. (At a minimum, you need both

3399

parents to create an annotation, but only need 1 parent to generate the

3400

fulltext.)

3401

3402

:return: A list of (key, index_memo) records, suitable for

3403

passing to read_records_iter to start reading in the raw data from

3404

the pack file.

3405

"""

3406

pending = set([key])

3407

records = []

3408

ann_keys = set()

3409

self._num_needed_children[key] = 1

3410

while pending:

3411

# get all pending nodes

3412

this_iteration = pending

3413

build_details = self._vf._index.get_build_details(this_iteration)

3414

self._all_build_details.update(build_details)

3415

# new_nodes = self._vf._index._get_entries(this_iteration)

3416

pending = set()

3417

for key, details in build_details.iteritems():

3418

(index_memo, compression_parent, parent_keys,

3419

record_details) = details

3420

self._parent_map[key] = parent_keys

3421

self._heads_provider = None

3422

records.append((key, index_memo))

3423

# Do we actually need to check _annotated_lines?

3424

pending.update([p for p in parent_keys

3425

if p not in self._all_build_details])

3426

if parent_keys:

3427

for parent_key in parent_keys:

3428

if parent_key in self._num_needed_children:

3429

self._num_needed_children[parent_key] += 1

3430

else:

3431

self._num_needed_children[parent_key] = 1

3432

if compression_parent:

3433

if compression_parent in self._num_compression_children:

3434

self._num_compression_children[compression_parent] += 1

3435

else:

3436

self._num_compression_children[compression_parent] = 1

3437

3438

missing_versions = this_iteration.difference(build_details.keys())

3439

if missing_versions:

3440

for key in missing_versions:

3441

if key in self._parent_map and key in self._text_cache:

3442

# We already have this text ready, we just need to

3443

# yield it later so we get it annotated

3444

ann_keys.add(key)

3445

parent_keys = self._parent_map[key]

3446

for parent_key in parent_keys:

3447

if parent_key in self._num_needed_children:

3448

self._num_needed_children[parent_key] += 1

3449

else:

3450

self._num_needed_children[parent_key] = 1

3451

pending.update([p for p in parent_keys

3452

if p not in self._all_build_details])

3453

else:

3454

raise errors.RevisionNotPresent(key, self._vf)

3455

# Generally we will want to read the records in reverse order, because

3456

# we find the parent nodes after the children

3457

records.reverse()

3458

return records, ann_keys

3459

3460

def _get_needed_texts(self, key, pb=None):

3461

# if True or len(self._vf._immediate_fallback_vfs) > 0:

3462

if len(self._vf._immediate_fallback_vfs) > 0:

3463

# If we have fallbacks, go to the generic path

3464

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3465

yield v

3466

return

3467

while True:

3468

try:

3469

records, ann_keys = self._get_build_graph(key)

3470

for idx, (sub_key, text, num_lines) in enumerate(

3471

self._extract_texts(records)):

3472

if pb is not None:

3473

pb.update('annotating', idx, len(records))

3474

yield sub_key, text, num_lines

3475

for sub_key in ann_keys:

3476

text = self._text_cache[sub_key]

3477

num_lines = len(text) # bad assumption

3478

yield sub_key, text, num_lines

3479

return

3480

except errors.RetryWithNewPacks, e:

3481

self._vf._access.reload_or_raise(e)

3482

# The cached build_details are no longer valid

3483

self._all_build_details.clear()

3484

3485

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3486

parent_lines = self._text_cache[compression_parent]

3487

blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))

3488

self._matching_blocks[(key, compression_parent)] = blocks

3489

3490

def _expand_record(self, key, parent_keys, compression_parent, record,

3491

record_details):

3492

delta = None

3493

if compression_parent:

3494

if compression_parent not in self._content_objects:

3495

# Waiting for the parent

3496

self._pending_deltas.setdefault(compression_parent, []).append(

3497

(key, parent_keys, record, record_details))

3498

return None

3499

# We have the basis parent, so expand the delta

3500

num = self._num_compression_children[compression_parent]

3501

num -= 1

3502

if num == 0:

3503

base_content = self._content_objects.pop(compression_parent)

3504

self._num_compression_children.pop(compression_parent)

1860

3505

else:

1861

for j in js:

1862

# a[i] matches b[j]

1863

if j >= blo:

1864

if j >= bhi:

1865

break

1866

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1867

if k > bestsize:

1868

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1869

j2len = newj2len

1870

1871

# Extend the best by non-junk elements on each end. In particular,

1872

# "popular" non-junk elements aren't in b2j, which greatly speeds

1873

# the inner loop above, but also means "the best" match so far

1874

# doesn't contain any junk *or* popular non-junk elements.

1875

while besti > alo and bestj > blo and \

1876

not isbjunk(b[bestj-1]) and \

1877

a[besti-1] == b[bestj-1]:

1878

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1879

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1880

not isbjunk(b[bestj+bestsize]) and \

1881

a[besti+bestsize] == b[bestj+bestsize]:

1882

bestsize += 1

1883

1884

# Now that we have a wholly interesting match (albeit possibly

1885

# empty!), we may as well suck up the matching junk on each

1886

# side of it too. Can't think of a good reason not to, and it

1887

# saves post-processing the (possibly considerable) expense of

1888

# figuring out what to do with it. In the case of an empty

1889

# interesting match, this is clearly the right thing to do,

1890

# because no other kind of match is possible in the regions.

1891

while besti > alo and bestj > blo and \

1892

isbjunk(b[bestj-1]) and \

1893

a[besti-1] == b[bestj-1]:

1894

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1895

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1896

isbjunk(b[bestj+bestsize]) and \

1897

a[besti+bestsize] == b[bestj+bestsize]:

1898

bestsize = bestsize + 1

1899

1900

return besti, bestj, bestsize

1901

3506

self._num_compression_children[compression_parent] = num

3507

base_content = self._content_objects[compression_parent]

3508

# It is tempting to want to copy_base_content=False for the last

3509

# child object. However, whenever noeol=False,

3510

# self._text_cache[parent_key] is content._lines. So mutating it

3511

# gives very bad results.

3512

# The alternative is to copy the lines into text cache, but then we

3513

# are copying anyway, so just do it here.

3514

content, delta = self._vf._factory.parse_record(

3515

key, record, record_details, base_content,

3516

copy_base_content=True)

3517

else:

3518

# Fulltext record

3519

content, _ = self._vf._factory.parse_record(

3520

key, record, record_details, None)

3521

if self._num_compression_children.get(key, 0) > 0:

3522

self._content_objects[key] = content

3523

lines = content.text()

3524

self._text_cache[key] = lines

3525

if delta is not None:

3526

self._cache_delta_blocks(key, compression_parent, delta, lines)

3527

return lines

3528

3529

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3530

"""Get the list of annotations for the parent, and the matching lines.

3531

3532

:param text: The opaque value given by _get_needed_texts

3533

:param parent_key: The key for the parent text

3534

:return: (parent_annotations, matching_blocks)

3535

parent_annotations is a list as long as the number of lines in

3536

parent

3537

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3538

indicating which lines match between the two texts

3539

"""

3540

block_key = (key, parent_key)

3541

if block_key in self._matching_blocks:

3542

blocks = self._matching_blocks.pop(block_key)

3543

parent_annotations = self._annotations_cache[parent_key]

3544

return parent_annotations, blocks

3545

return annotate.Annotator._get_parent_annotations_and_matches(self,

3546

key, text, parent_key)

3547

3548

def _process_pending(self, key):

3549

"""The content for 'key' was just processed.

3550

3551

Determine if there is any more pending work to be processed.

3552

"""

3553

to_return = []

3554

if key in self._pending_deltas:

3555

compression_parent = key

3556

children = self._pending_deltas.pop(key)

3557

for child_key, parent_keys, record, record_details in children:

3558

lines = self._expand_record(child_key, parent_keys,

3559

compression_parent,

3560

record, record_details)

3561

if self._check_ready_for_annotations(child_key, parent_keys):

3562

to_return.append(child_key)

3563

# Also check any children that are waiting for this parent to be

3564

# annotation ready

3565

if key in self._pending_annotation:

3566

children = self._pending_annotation.pop(key)

3567

to_return.extend([c for c, p_keys in children

3568

if self._check_ready_for_annotations(c, p_keys)])

3569

return to_return

3570

3571

def _check_ready_for_annotations(self, key, parent_keys):

3572

"""return true if this text is ready to be yielded.

3573

3574

Otherwise, this will return False, and queue the text into

3575

self._pending_annotation

3576

"""

3577

for parent_key in parent_keys:

3578

if parent_key not in self._annotations_cache:

3579

# still waiting on at least one parent text, so queue it up

3580

# Note that if there are multiple parents, we need to wait

3581

# for all of them.

3582

self._pending_annotation.setdefault(parent_key,

3583

[]).append((key, parent_keys))

3584

return False

3585

return True

3586

3587

def _extract_texts(self, records):

3588

"""Extract the various texts needed based on records"""

3589

# We iterate in the order read, rather than a strict order requested

3590

# However, process what we can, and put off to the side things that

3591

# still need parents, cleaning them up when those parents are

3592

# processed.

3593

# Basic data flow:

3594

# 1) As 'records' are read, see if we can expand these records into

3595

# Content objects (and thus lines)

3596

# 2) If a given line-delta is waiting on its compression parent, it

3597

# gets queued up into self._pending_deltas, otherwise we expand

3598

# it, and put it into self._text_cache and self._content_objects

3599

# 3) If we expanded the text, we will then check to see if all

3600

# parents have also been processed. If so, this text gets yielded,

3601

# else this record gets set aside into pending_annotation

3602

# 4) Further, if we expanded the text in (2), we will then check to

3603

# see if there are any children in self._pending_deltas waiting to

3604

# also be processed. If so, we go back to (2) for those

3605

# 5) Further again, if we yielded the text, we can then check if that

3606

# 'unlocks' any of the texts in pending_annotations, which should

3607

# then get yielded as well

3608

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3609

# compression child could unlock yet another, and yielding a fulltext

3610

# will also 'unlock' the children that are waiting on that annotation.

3611

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3612

# if other parents are also waiting.)

3613

# We want to yield content before expanding child content objects, so

3614

# that we know when we can re-use the content lines, and the annotation

3615

# code can know when it can stop caching fulltexts, as well.

3616

3617

# Children that are missing their compression parent

3618

pending_deltas = {}

3619

for (key, record, digest) in self._vf._read_records_iter(records):

3620

# ghosts?

3621

details = self._all_build_details[key]

3622

(_, compression_parent, parent_keys, record_details) = details

3623

lines = self._expand_record(key, parent_keys, compression_parent,

3624

record, record_details)

3625

if lines is None:

3626

# Pending delta should be queued up

3627

continue

3628

# At this point, we may be able to yield this content, if all

3629

# parents are also finished

3630

yield_this_text = self._check_ready_for_annotations(key,

3631

parent_keys)

3632

if yield_this_text:

3633

# All parents present

3634

yield key, lines, len(lines)

3635

to_process = self._process_pending(key)

3636

while to_process:

3637

this_process = to_process

3638

to_process = []

3639

for key in this_process:

3640

lines = self._text_cache[key]

3641

yield key, lines, len(lines)

3642

to_process.extend(self._process_pending(key))

3643

3644

try:

3645

from bzrlib._knit_load_data_pyx import _load_data_c as _load_data

3646

except ImportError, e:

3647

osutils.failed_to_load_extension(e)

3648

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »