~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-08-27 01:34:47 UTC
mfrom: (4650.2.2 bug-393677)
Revision ID: pqm@pqm.ubuntu.com-20090827013447-ndjtt02ad7nfdoiy

(robertc) Pass IncompatibleRepositories error over the smart server.
(Robert Collins)

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/clean_tree.py

bzrlib/crash.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/inventory_delta.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/python-compat.h

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/util/bencode.py

bzrlib/views.py

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/conf.py

doc/contents.txt

doc/developers/apport.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/content-filtering.txt

doc/developers/cycle.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/lca_tree_merging.txt

doc/en/migration

doc/en/migration/index.txt

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/index-for-2x.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/shelving_changes.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/index.ru.txt

doc/make.bat

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/check-newsbugs.py

tools/packaging/lp-upload-release

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files removed:
bzrlib/_walkdirs_win32.h

bzrlib/help_topics/en/hooks.txt

bzrlib/tests/test_http_implementations.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/tests/test_bencode.py

doc/developers/performance-contributing.txt

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/tests/ftp_server.py => bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

doc/en/quick-reference/Makefile => doc/_static/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/_static/en/quick-reference/bzr-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/_static/en/quick-reference/bzr-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/_static/en/quick-reference/bzr-quick-reference.svg

doc/developers/HACKING.txt => doc/en/developer-guide/HACKING.txt

doc/es/guia-desarrollador/ => doc/es/developer-guide/

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/quick-reference/quick-start-summary.svg

doc/es/notas-version/ => doc/es/release-notes/

doc/es/guia-usuario/ => doc/es/user-guide/

doc/es/referencia/ => doc/es/user-reference/

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/treeshape.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib/bzr_access

contrib/newinventory.py

contrib/pwclient.full

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/performance-roadmap.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/revision-properties.txt

doc/developers/testing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

doc/es/quick-reference/Makefile

doc/index.es.txt

doc/index.txt

profile_imports.py

setup.py

tools/bzr_epydoc_uid.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/package_mf.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/weavebench.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Knit versionedfile implementation.

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from cStringIO import StringIO

from itertools import izip, chain

from itertools import izip

import operator

import os

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

SHA1KnitCorrupt,

)

from bzrlib.osutils import (

100

contains_whitespace,

108

102

adapter_registry,

109

103

ConstantMapper,

110

104

ContentFactory,

111

FulltextContentFactory,

105

ChunkedContentFactory,

106

sort_groupcompress,

112

107

VersionedFile,

113

108

VersionedFiles,

114

109

)

129

124

130

125

DATA_SUFFIX = '.knit'

131

126

INDEX_SUFFIX = '.kndx'

127

_STREAM_MIN_BUFFER_SIZE = 5*1024*1024

132

128

133

129

134

130

class KnitAdapter(object):

136

132

137

133

def __init__(self, basis_vf):

138

134

"""Create an adapter which accesses full texts from basis_vf.

139

135

140

136

:param basis_vf: A versioned file to access basis texts of deltas from.

141

137

May be None for adapters that do not need to access basis texts.

142

138

"""

149

145

class FTAnnotatedToUnannotated(KnitAdapter):

150

146

"""An adapter from FT annotated knits to unannotated ones."""

151

147

152

def get_bytes(self, factory, annotated_compressed_bytes):

148

def get_bytes(self, factory):

149

annotated_compressed_bytes = factory._raw_record

153

150

rec, contents = \

154

151

self._data._parse_record_unchecked(annotated_compressed_bytes)

155

152

content = self._annotate_factory.parse_fulltext(contents, rec[1])

160

157

class DeltaAnnotatedToUnannotated(KnitAdapter):

161

158

"""An adapter for deltas from annotated to unannotated."""

162

159

163

def get_bytes(self, factory, annotated_compressed_bytes):

160

def get_bytes(self, factory):

161

annotated_compressed_bytes = factory._raw_record

164

162

rec, contents = \

165

163

self._data._parse_record_unchecked(annotated_compressed_bytes)

166

164

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

173

171

class FTAnnotatedToFullText(KnitAdapter):

174

172

"""An adapter from FT annotated knits to unannotated ones."""

175

173

176

def get_bytes(self, factory, annotated_compressed_bytes):

174

def get_bytes(self, factory):

175

annotated_compressed_bytes = factory._raw_record

177

176

rec, contents = \

178

177

self._data._parse_record_unchecked(annotated_compressed_bytes)

179

178

content, delta = self._annotate_factory.parse_record(factory.key[-1],

184

183

class DeltaAnnotatedToFullText(KnitAdapter):

185

184

"""An adapter for deltas from annotated to unannotated."""

186

185

187

def get_bytes(self, factory, annotated_compressed_bytes):

186

def get_bytes(self, factory):

187

annotated_compressed_bytes = factory._raw_record

188

rec, contents = \

189

self._data._parse_record_unchecked(annotated_compressed_bytes)

190

delta = self._annotate_factory.parse_line_delta(contents, rec[1],

194

[compression_parent], 'unordered', True).next()

195

if basis_entry.storage_kind == 'absent':

196

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

197

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

197

basis_chunks = basis_entry.get_bytes_as('chunked')

198

basis_lines = osutils.chunks_to_lines(basis_chunks)

198

199

# Manually apply the delta because we have one annotated content and

199

200

# one plain.

200

201

basis_content = PlainKnitContent(basis_lines, compression_parent)

206

207

class FTPlainToFullText(KnitAdapter):

207

208

"""An adapter from FT plain knits to unannotated ones."""

208

209

def get_bytes(self, factory, compressed_bytes):

210

def get_bytes(self, factory):

211

compressed_bytes = factory._raw_record

210

212

rec, contents = \

211

213

self._data._parse_record_unchecked(compressed_bytes)

212

214

content, delta = self._plain_factory.parse_record(factory.key[-1],

217

219

class DeltaPlainToFullText(KnitAdapter):

218

220

"""An adapter for deltas from annotated to unannotated."""

219

221

220

def get_bytes(self, factory, compressed_bytes):

222

def get_bytes(self, factory):

223

compressed_bytes = factory._raw_record

221

224

rec, contents = \

222

225

self._data._parse_record_unchecked(compressed_bytes)

223

226

delta = self._plain_factory.parse_line_delta(contents, rec[1])

227

230

[compression_parent], 'unordered', True).next()

228

231

if basis_entry.storage_kind == 'absent':

229

232

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

230

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

233

basis_chunks = basis_entry.get_bytes_as('chunked')

234

basis_lines = osutils.chunks_to_lines(basis_chunks)

231

235

basis_content = PlainKnitContent(basis_lines, compression_parent)

232

236

# Manually apply the delta because we have one annotated content and

233

237

# one plain.

238

242

239

243

class KnitContentFactory(ContentFactory):

240

244

"""Content factory for streaming from knits.

241

245

242

246

:seealso ContentFactory:

243

247

"""

244

248

245

249

def __init__(self, key, parents, build_details, sha1, raw_record,

246

annotated, knit=None):

250

annotated, knit=None, network_bytes=None):

247

251

"""Create a KnitContentFactory for key.

248

252

249

253

:param key: The key.

250

254

:param parents: The parents.

251

255

:param build_details: The build details as returned from

253

257

:param sha1: The sha1 expected from the full text of this object.

254

258

:param raw_record: The bytes of the knit data from disk.

255

259

:param annotated: True if the raw data is annotated.

260

:param network_bytes: None to calculate the network bytes on demand,

261

not-none if they are already known.

256

262

"""

257

263

ContentFactory.__init__(self)

258

264

self.sha1 = sha1

268

274

annotated_kind = ''

269

275

self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)

270

276

self._raw_record = raw_record

277

self._network_bytes = network_bytes

271

278

self._build_details = build_details

272

279

self._knit = knit

273

280

274

def get_bytes_as(self, storage_kind):

275

if storage_kind == self.storage_kind:

276

return self._raw_record

277

if storage_kind == 'fulltext' and self._knit is not None:

278

return self._knit.get_text(self.key[0])

279

else:

280

raise errors.UnavailableRepresentation(self.key, storage_kind,

281

self.storage_kind)

281

def _create_network_bytes(self):

282

"""Create a fully serialised network version for transmission."""

283

# storage_kind, key, parents, Noeol, raw_record

284

key_bytes = '\x00'.join(self.key)

285

if self.parents is None:

286

parent_bytes = 'None:'

287

else:

288

parent_bytes = '\t'.join('\x00'.join(key) for key in self.parents)

289

if self._build_details[1]:

290

noeol = 'N'

291

else:

292

noeol = ' '

293

network_bytes = "%s\n%s\n%s\n%s%s" % (self.storage_kind, key_bytes,

294

parent_bytes, noeol, self._raw_record)

295

self._network_bytes = network_bytes

296

297

def get_bytes_as(self, storage_kind):

298

if storage_kind == self.storage_kind:

299

if self._network_bytes is None:

300

self._create_network_bytes()

301

return self._network_bytes

302

if ('-ft-' in self.storage_kind and

303

storage_kind in ('chunked', 'fulltext')):

304

adapter_key = (self.storage_kind, 'fulltext')

305

adapter_factory = adapter_registry.get(adapter_key)

306

adapter = adapter_factory(None)

307

bytes = adapter.get_bytes(self)

308

if storage_kind == 'chunked':

309

return [bytes]

310

else:

311

return bytes

312

if self._knit is not None:

313

# Not redundant with direct conversion above - that only handles

314

# fulltext cases.

315

if storage_kind == 'chunked':

316

return self._knit.get_lines(self.key[0])

317

elif storage_kind == 'fulltext':

318

return self._knit.get_text(self.key[0])

319

raise errors.UnavailableRepresentation(self.key, storage_kind,

320

self.storage_kind)

321

322

323

class LazyKnitContentFactory(ContentFactory):

324

"""A ContentFactory which can either generate full text or a wire form.

325

326

:seealso ContentFactory:

327

"""

328

329

def __init__(self, key, parents, generator, first):

330

"""Create a LazyKnitContentFactory.

331

332

:param key: The key of the record.

333

:param parents: The parents of the record.

334

:param generator: A _ContentMapGenerator containing the record for this

335

key.

336

:param first: Is this the first content object returned from generator?

337

if it is, its storage kind is knit-delta-closure, otherwise it is

338

knit-delta-closure-ref

339

"""

340

self.key = key

341

self.parents = parents

342

self.sha1 = None

343

self._generator = generator

344

self.storage_kind = "knit-delta-closure"

345

if not first:

346

self.storage_kind = self.storage_kind + "-ref"

347

self._first = first

348

349

def get_bytes_as(self, storage_kind):

350

if storage_kind == self.storage_kind:

351

if self._first:

352

return self._generator._wire_bytes()

353

else:

354

# all the keys etc are contained in the bytes returned in the

355

# first record.

356

return ''

357

if storage_kind in ('chunked', 'fulltext'):

358

chunks = self._generator._get_one_work(self.key).text()

359

if storage_kind == 'chunked':

360

return chunks

361

else:

362

return ''.join(chunks)

363

raise errors.UnavailableRepresentation(self.key, storage_kind,

364

self.storage_kind)

365

366

367

def knit_delta_closure_to_records(storage_kind, bytes, line_end):

368

"""Convert a network record to a iterator over stream records.

369

370

:param storage_kind: The storage kind of the record.

371

Must be 'knit-delta-closure'.

372

:param bytes: The bytes of the record on the network.

373

"""

374

generator = _NetworkContentMapGenerator(bytes, line_end)

375

return generator.get_record_stream()

376

377

378

def knit_network_to_record(storage_kind, bytes, line_end):

379

"""Convert a network record to a record object.

380

381

:param storage_kind: The storage kind of the record.

382

:param bytes: The bytes of the record on the network.

383

"""

384

start = line_end

385

line_end = bytes.find('\n', start)

386

key = tuple(bytes[start:line_end].split('\x00'))

387

start = line_end + 1

388

line_end = bytes.find('\n', start)

389

parent_line = bytes[start:line_end]

390

if parent_line == 'None:':

391

parents = None

392

else:

393

parents = tuple(

394

[tuple(segment.split('\x00')) for segment in parent_line.split('\t')

395

if segment])

396

start = line_end + 1

397

noeol = bytes[start] == 'N'

398

if 'ft' in storage_kind:

399

method = 'fulltext'

400

else:

401

method = 'line-delta'

402

build_details = (method, noeol)

403

start = start + 1

404

raw_record = bytes[start:]

405

annotated = 'annotated' in storage_kind

406

return [KnitContentFactory(key, parents, build_details, None, raw_record,

407

annotated, network_bytes=bytes)]

282

408

283

409

284

410

class KnitContent(object):

285

411

"""Content of a knit version to which deltas can be applied.

286

412

287

413

This is always stored in memory as a list of lines with \n at the end,

288

plus a flag saying if the final ending is really there or not, because that

414

plus a flag saying if the final ending is really there or not, because that

289

415

corresponds to the on-disk knit representation.

290

416

"""

291

417

380

506

381

507

class PlainKnitContent(KnitContent):

382

508

"""Unannotated content.

383

509

384

510

When annotate[_iter] is called on this content, the same version is reported

385

511

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

386

512

objects.

538

664

539

665

see parse_fulltext which this inverts.

540

666

"""

541

# TODO: jam 20070209 We only do the caching thing to make sure that

542

# the origin is a valid utf-8 line, eventually we could remove it

543

667

return ['%s %s' % (o, t) for o, t in content._lines]

544

668

545

669

def lower_line_delta(self, delta):

560

684

content = knit._get_content(key)

561

685

# adjust for the fact that serialised annotations are only key suffixes

562

686

# for this factory.

563

if type(key) == tuple:

687

if type(key) is tuple:

564

688

prefix = key[:-1]

565

689

origins = content.annotate()

566

690

result = []

632

756

633

757

def annotate(self, knit, key):

634

758

annotator = _KnitAnnotator(knit)

635

return annotator.annotate(key)

759

return annotator.annotate_flat(key)

636

760

637

761

638

762

641

765

642

766

This is only functional enough to run interface tests, it doesn't try to

643

767

provide a full pack environment.

644

768

645

769

:param annotated: knit annotations are wanted.

646

770

:param mapper: The mapper from keys to paths.

647

771

"""

657

781

658

782

This is only functional enough to run interface tests, it doesn't try to

659

783

provide a full pack environment.

660

784

661

785

:param graph: Store a graph.

662

786

:param delta: Delta compress contents.

663

787

:param keylength: How long should keys be.

694

818

versioned_files.writer.end()

695

819

696

820

821

def _get_total_build_size(self, keys, positions):

822

"""Determine the total bytes to build these keys.

823

824

(helper function because _KnitGraphIndex and _KndxIndex work the same, but

825

don't inherit from a common base.)

826

827

:param keys: Keys that we want to build

828

:param positions: dict of {key, (info, index_memo, comp_parent)} (such

829

as returned by _get_components_positions)

830

:return: Number of bytes to build those keys

831

"""

832

all_build_index_memos = {}

833

build_keys = keys

834

while build_keys:

835

next_keys = set()

836

for key in build_keys:

837

# This is mostly for the 'stacked' case

838

# Where we will be getting the data from a fallback

839

if key not in positions:

840

continue

841

_, index_memo, compression_parent = positions[key]

842

all_build_index_memos[key] = index_memo

843

if compression_parent not in all_build_index_memos:

844

next_keys.add(compression_parent)

845

build_keys = next_keys

846

return sum([index_memo[2] for index_memo

847

in all_build_index_memos.itervalues()])

848

849

697

850

class KnitVersionedFiles(VersionedFiles):

698

851

"""Storage for many versioned files using knit compression.

699

852

700

853

Backend storage is managed by indices and data objects.

701

854

702

:ivar _index: A _KnitGraphIndex or similar that can describe the

703

parents, graph, compression and data location of entries in this

704

KnitVersionedFiles. Note that this is only the index for

855

:ivar _index: A _KnitGraphIndex or similar that can describe the

856

parents, graph, compression and data location of entries in this

857

KnitVersionedFiles. Note that this is only the index for

705

858

*this* vfs; if there are fallbacks they must be queried separately.

706

859

"""

707

860

708

861

def __init__(self, index, data_access, max_delta_chain=200,

709

annotated=False):

862

annotated=False, reload_func=None):

710

863

"""Create a KnitVersionedFiles with index and data_access.

711

864

712

865

:param index: The index for the knit data.

716

869

insertion. Set to 0 to prohibit the use of deltas.

717

870

:param annotated: Set to True to cause annotations to be calculated and

718

871

stored during insertion.

872

:param reload_func: An function that can be called if we think we need

873

to reload the pack listing and try again. See

874

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

719

875

"""

720

876

self._index = index

721

877

self._access = data_access

725

881

else:

726

882

self._factory = KnitPlainFactory()

727

883

self._fallback_vfs = []

884

self._reload_func = reload_func

885

886

def __repr__(self):

887

return "%s(%r, %r)" % (

888

self.__class__.__name__,

889

self._index,

890

self._access)

728

891

729

892

def add_fallback_versioned_files(self, a_versioned_files):

730

893

"""Add a source of texts for texts not present in this knit.

744

907

# indexes can't directly store that, so we give them

745

908

# an empty tuple instead.

746

909

parents = ()

910

line_bytes = ''.join(lines)

747

911

return self._add(key, lines, parents,

748

parent_texts, left_matching_blocks, nostore_sha, random_id)

912

parent_texts, left_matching_blocks, nostore_sha, random_id,

913

line_bytes=line_bytes)

914

915

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

916

"""See VersionedFiles._add_text()."""

917

self._index._check_write_ok()

918

self._check_add(key, None, random_id, check_content=False)

919

if text.__class__ is not str:

920

raise errors.BzrBadParameterUnicode("text")

921

if parents is None:

922

# The caller might pass None if there is no graph data, but kndx

923

# indexes can't directly store that, so we give them

924

# an empty tuple instead.

925

parents = ()

926

return self._add(key, None, parents,

927

None, None, nostore_sha, random_id,

928

line_bytes=text)

749

929

750

930

def _add(self, key, lines, parents, parent_texts,

751

left_matching_blocks, nostore_sha, random_id):

931

left_matching_blocks, nostore_sha, random_id,

932

line_bytes):

752

933

"""Add a set of lines on top of version specified by parents.

753

934

754

935

Any versions not present will be converted into ghosts.

936

937

:param lines: A list of strings where each one is a single line (has a

938

single newline at the end of the string) This is now optional

939

(callers can pass None). It is left in its location for backwards

940

compatibility. It should ''.join(lines) must == line_bytes

941

:param line_bytes: A single string containing the content

942

943

We pass both lines and line_bytes because different routes bring the

944

values to this function. And for memory efficiency, we don't want to

945

have to split/join on-demand.

755

946

"""

756

947

# first thing, if the content is something we don't need to store, find

757

948

# that out.

758

line_bytes = ''.join(lines)

759

949

digest = sha_string(line_bytes)

760

950

if nostore_sha == digest:

761

951

raise errors.ExistingContent

763

953

present_parents = []

764

954

if parent_texts is None:

765

955

parent_texts = {}

766

# Do a single query to ascertain parent presence.

767

present_parent_map = self.get_parent_map(parents)

956

# Do a single query to ascertain parent presence; we only compress

957

# against parents in the same kvf.

958

present_parent_map = self._index.get_parent_map(parents)

768

959

for parent in parents:

769

960

if parent in present_parent_map:

770

961

present_parents.append(parent)

781

972

782

973

text_length = len(line_bytes)

783

974

options = []

784

if lines:

785

if lines[-1][-1] != '\n':

786

# copy the contents of lines.

975

no_eol = False

976

# Note: line_bytes is not modified to add a newline, that is tracked

977

# via the no_eol flag. 'lines' *is* modified, because that is the

978

# general values needed by the Content code.

979

if line_bytes and line_bytes[-1] != '\n':

980

options.append('no-eol')

981

no_eol = True

982

# Copy the existing list, or create a new one

983

if lines is None:

984

lines = osutils.split_lines(line_bytes)

985

else:

787

986

lines = lines[:]

788

options.append('no-eol')

789

lines[-1] = lines[-1] + '\n'

790

line_bytes += '\n'

987

# Replace the last line with one that ends in a final newline

988

lines[-1] = lines[-1] + '\n'

989

if lines is None:

990

lines = osutils.split_lines(line_bytes)

791

991

792

for element in key:

793

if type(element) != str:

992

for element in key[:-1]:

993

if type(element) is not str:

994

raise TypeError("key contains non-strings: %r" % (key,))

995

if key[-1] is None:

996

key = key[:-1] + ('sha1:' + digest,)

997

elif type(key[-1]) is not str:

794

998

raise TypeError("key contains non-strings: %r" % (key,))

795

999

# Knit hunks are still last-element only

796

1000

version_id = key[-1]

797

1001

content = self._factory.make(lines, version_id)

798

if 'no-eol' in options:

1002

if no_eol:

799

1003

# Hint to the content object that its text() call should strip the

800

1004

# EOL.

801

1005

content._should_strip_eol = True

813

1017

else:

814

1018

options.append('fulltext')

815

1019

# isinstance is slower and we have no hierarchy.

816

if self._factory.__class__ == KnitPlainFactory:

1020

if self._factory.__class__ is KnitPlainFactory:

817

1021

# Use the already joined bytes saving iteration time in

818

1022

# _record_to_data.

1023

dense_lines = [line_bytes]

1024

if no_eol:

1025

dense_lines.append('\n')

819

1026

size, bytes = self._record_to_data(key, digest,

820

lines, [line_bytes])

1027

lines, dense_lines)

821

1028

else:

822

1029

# get mixed annotation + content and feed it into the

823

1030

# serialiser.

835

1042

"""See VersionedFiles.annotate."""

836

1043

return self._factory.annotate(self, key)

837

1044

838

def check(self, progress_bar=None):

1045

def get_annotator(self):

1046

return _KnitAnnotator(self)

1047

1048

def check(self, progress_bar=None, keys=None):

839

1049

"""See VersionedFiles.check()."""

1050

if keys is None:

1051

return self._logical_check()

1052

else:

1053

# At the moment, check does not extra work over get_record_stream

1054

return self.get_record_stream(keys, 'unordered', True)

1055

1056

def _logical_check(self):

840

1057

# This doesn't actually test extraction of everything, but that will

841

1058

# impact 'bzr check' substantially, and needs to be integrated with

842

1059

# care. However, it does check for the obvious problem of a delta with

856

1073

def _check_add(self, key, lines, random_id, check_content):

857

1074

"""check that version_id and lines are safe to add."""

858

1075

version_id = key[-1]

859

if contains_whitespace(version_id):

860

raise InvalidRevisionId(version_id, self)

861

self.check_not_reserved_id(version_id)

1076

if version_id is not None:

1077

if contains_whitespace(version_id):

1078

raise InvalidRevisionId(version_id, self)

1079

self.check_not_reserved_id(version_id)

862

1080

# TODO: If random_id==False and the key is already present, we should

863

1081

# probably check that the existing content is identical to what is

864

1082

# being inserted, and otherwise raise an exception. This would make

874

1092

875

1093

def _check_header_version(self, rec, version_id):

876

1094

"""Checks the header version on original format knit records.

877

1095

878

1096

These have the last component of the key embedded in the record.

879

1097

"""

880

1098

if rec[1] != version_id:

895

1113

delta_size = 0

896

1114

fulltext_size = None

897

1115

for count in xrange(self._max_delta_chain):

898

# XXX: Collapse these two queries:

899

1116

try:

900

1117

# Note that this only looks in the index of this particular

901

1118

# KnitVersionedFiles, not in the fallbacks. This ensures that

902

1119

# we won't store a delta spanning physical repository

903

1120

# boundaries.

904

method = self._index.get_method(parent)

905

except RevisionNotPresent:

906

# Some basis is not locally present: always delta

1121

build_details = self._index.get_build_details([parent])

1122

parent_details = build_details[parent]

1123

except (RevisionNotPresent, KeyError), e:

1124

# Some basis is not locally present: always fulltext

907

1125

return False

908

index, pos, size = self._index.get_position(parent)

909

if method == 'fulltext':

1126

index_memo, compression_parent, _, _ = parent_details

1127

_, _, size = index_memo

1128

if compression_parent is None:

910

1129

fulltext_size = size

911

1130

break

912

1131

delta_size += size

913

1132

# We don't explicitly check for presence because this is in an

914

1133

# inner loop, and if it's missing it'll fail anyhow.

915

# TODO: This should be asking for compression parent, not graph

916

# parent.

917

parent = self._index.get_parent_map([parent])[parent][0]

1134

parent = compression_parent

918

1135

else:

919

1136

# We couldn't find a fulltext, so we must create a new one

920

1137

return False

960

1177

if missing and not allow_missing:

961

1178

raise errors.RevisionNotPresent(missing.pop(), self)

962

1179

return component_data

963

1180

964

1181

def _get_content(self, key, parent_texts={}):

965

1182

"""Returns a content object that makes up the specified

966

1183

version."""

970

1187

if not self.get_parent_map([key]):

971

1188

raise RevisionNotPresent(key, self)

972

1189

return cached_version

973

text_map, contents_map = self._get_content_maps([key])

974

return contents_map[key]

975

976

def _get_content_maps(self, keys, nonlocal_keys=None):

977

"""Produce maps of text and KnitContents

978

979

:param keys: The keys to produce content maps for.

980

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

981

which are known to not be in this knit, but rather in one of the

982

fallback knits.

983

:return: (text_map, content_map) where text_map contains the texts for

984

the requested versions and content_map contains the KnitContents.

985

"""

986

# FUTURE: This function could be improved for the 'extract many' case

987

# by tracking each component and only doing the copy when the number of

988

# children than need to apply delta's to it is > 1 or it is part of the

989

# final output.

990

keys = list(keys)

991

multiple_versions = len(keys) != 1

992

record_map = self._get_record_map(keys, allow_missing=True)

993

994

text_map = {}

995

content_map = {}

996

final_content = {}

997

if nonlocal_keys is None:

998

nonlocal_keys = set()

999

else:

1000

nonlocal_keys = frozenset(nonlocal_keys)

1001

missing_keys = set(nonlocal_keys)

1002

for source in self._fallback_vfs:

1003

if not missing_keys:

1004

break

1005

for record in source.get_record_stream(missing_keys,

1006

'unordered', True):

1007

if record.storage_kind == 'absent':

1008

continue

1009

missing_keys.remove(record.key)

1010

lines = split_lines(record.get_bytes_as('fulltext'))

1011

text_map[record.key] = lines

1012

content_map[record.key] = PlainKnitContent(lines, record.key)

1013

if record.key in keys:

1014

final_content[record.key] = content_map[record.key]

1015

for key in keys:

1016

if key in nonlocal_keys:

1017

# already handled

1018

continue

1019

components = []

1020

cursor = key

1021

while cursor is not None:

1022

try:

1023

record, record_details, digest, next = record_map[cursor]

1024

except KeyError:

1025

raise RevisionNotPresent(cursor, self)

1026

components.append((cursor, record, record_details, digest))

1027

cursor = next

1028

if cursor in content_map:

1029

# no need to plan further back

1030

components.append((cursor, None, None, None))

1031

break

1032

1033

content = None

1034

for (component_id, record, record_details,

1035

digest) in reversed(components):

1036

if component_id in content_map:

1037

content = content_map[component_id]

1038

else:

1039

content, delta = self._factory.parse_record(key[-1],

1040

record, record_details, content,

1041

copy_base_content=multiple_versions)

1042

if multiple_versions:

1043

content_map[component_id] = content

1044

1045

final_content[key] = content

1046

1047

# digest here is the digest from the last applied component.

1048

text = content.text()

1049

actual_sha = sha_strings(text)

1050

if actual_sha != digest:

1051

raise KnitCorrupt(self,

1052

'\n sha-1 %s'

1053

'\n of reconstructed text does not match'

1054

'\n expected %s'

1055

'\n for version %s' %

1056

(actual_sha, digest, key))

1057

text_map[key] = text

1058

return text_map, final_content

1190

generator = _VFContentMapGenerator(self, [key])

1191

return generator._get_content(key)

1192

1193

def get_known_graph_ancestry(self, keys):

1194

"""Get a KnownGraph instance with the ancestry of keys."""

1195

parent_map, missing_keys = self._index.find_ancestry(keys)

1196

kg = _mod_graph.KnownGraph(parent_map)

1197

return kg

1059

1198

1060

1199

def get_parent_map(self, keys):

1061

1200

"""Get a map of the graph parents of keys.

1091

1230

1092

1231

def _get_record_map(self, keys, allow_missing=False):

1093

1232

"""Produce a dictionary of knit records.

1094

1233

1095

1234

:return: {key:(record, record_details, digest, next)}

1096

1235

record

1097

data returned from read_records

1236

data returned from read_records (a KnitContentobject)

1098

1237

record_details

1099

1238

opaque information to pass to parse_record

1100

1239

digest

1103

1242

build-parent of the version, i.e. the leftmost ancestor.

1104

1243

Will be None if the record is not a delta.

1105

1244

:param keys: The keys to build a map for

1106

:param allow_missing: If some records are missing, rather than

1245

:param allow_missing: If some records are missing, rather than

1107

1246

error, just return the data that could be generated.

1108

1247

"""

1109

position_map = self._get_components_positions(keys,

1248

raw_map = self._get_record_map_unparsed(keys,

1110

1249

allow_missing=allow_missing)

1111

# key = component_id, r = record_details, i_m = index_memo, n = next

1112

records = [(key, i_m) for key, (r, i_m, n)

1113

in position_map.iteritems()]

1114

record_map = {}

1115

for key, record, digest in \

1116

self._read_records_iter(records):

1117

(record_details, index_memo, next) = position_map[key]

1118

record_map[key] = record, record_details, digest, next

1119

return record_map

1250

return self._raw_map_to_record_map(raw_map)

1251

1252

def _raw_map_to_record_map(self, raw_map):

1253

"""Parse the contents of _get_record_map_unparsed.

1254

1255

:return: see _get_record_map.

1256

"""

1257

result = {}

1258

for key in raw_map:

1259

data, record_details, next = raw_map[key]

1260

content, digest = self._parse_record(key[-1], data)

1261

result[key] = content, record_details, digest, next

1262

return result

1263

1264

def _get_record_map_unparsed(self, keys, allow_missing=False):

1265

"""Get the raw data for reconstructing keys without parsing it.

1266

1267

:return: A dict suitable for parsing via _raw_map_to_record_map.

1268

key-> raw_bytes, (method, noeol), compression_parent

1269

"""

1270

# This retries the whole request if anything fails. Potentially we

1271

# could be a bit more selective. We could track the keys whose records

1272

# we have successfully found, and then only request the new records

1273

# from there. However, _get_components_positions grabs the whole build

1274

# chain, which means we'll likely try to grab the same records again

1275

# anyway. Also, can the build chains change as part of a pack

1276

# operation? We wouldn't want to end up with a broken chain.

1277

while True:

1278

try:

1279

position_map = self._get_components_positions(keys,

1280

allow_missing=allow_missing)

1281

# key = component_id, r = record_details, i_m = index_memo,

1282

# n = next

1283

records = [(key, i_m) for key, (r, i_m, n)

1284

in position_map.iteritems()]

1285

# Sort by the index memo, so that we request records from the

1286

# same pack file together, and in forward-sorted order

1287

records.sort(key=operator.itemgetter(1))

1288

raw_record_map = {}

1289

for key, data in self._read_records_iter_unchecked(records):

1290

(record_details, index_memo, next) = position_map[key]

1291

raw_record_map[key] = data, record_details, next

1292

return raw_record_map

1293

except errors.RetryWithNewPacks, e:

1294

self._access.reload_or_raise(e)

1295

1296

@classmethod

1297

def _split_by_prefix(cls, keys):

1298

"""For the given keys, split them up based on their prefix.

1299

1300

To keep memory pressure somewhat under control, split the

1301

requests back into per-file-id requests, otherwise "bzr co"

1302

extracts the full tree into memory before writing it to disk.

1303

This should be revisited if _get_content_maps() can ever cross

1304

file-id boundaries.

1305

1306

The keys for a given file_id are kept in the same relative order.

1307

Ordering between file_ids is not, though prefix_order will return the

1308

order that the key was first seen.

1309

1310

:param keys: An iterable of key tuples

1311

:return: (split_map, prefix_order)

1312

split_map A dictionary mapping prefix => keys

1313

prefix_order The order that we saw the various prefixes

1314

"""

1315

split_by_prefix = {}

1316

prefix_order = []

1317

for key in keys:

1318

if len(key) == 1:

1319

prefix = ''

1320

else:

1321

prefix = key[0]

1322

1323

if prefix in split_by_prefix:

1324

split_by_prefix[prefix].append(key)

1325

else:

1326

split_by_prefix[prefix] = [key]

1327

prefix_order.append(prefix)

1328

return split_by_prefix, prefix_order

1329

1330

def _group_keys_for_io(self, keys, non_local_keys, positions,

1331

_min_buffer_size=_STREAM_MIN_BUFFER_SIZE):

1332

"""For the given keys, group them into 'best-sized' requests.

1333

1334

The idea is to avoid making 1 request per file, but to never try to

1335

unpack an entire 1.5GB source tree in a single pass. Also when

1336

possible, we should try to group requests to the same pack file

1337

together.

1338

1339

:return: list of (keys, non_local) tuples that indicate what keys

1340

should be fetched next.

1341

"""

1342

# TODO: Ideally we would group on 2 factors. We want to extract texts

1343

# from the same pack file together, and we want to extract all

1344

# the texts for a given build-chain together. Ultimately it

1345

# probably needs a better global view.

1346

total_keys = len(keys)

1347

prefix_split_keys, prefix_order = self._split_by_prefix(keys)

1348

prefix_split_non_local_keys, _ = self._split_by_prefix(non_local_keys)

1349

cur_keys = []

1350

cur_non_local = set()

1351

cur_size = 0

1352

result = []

1353

sizes = []

1354

for prefix in prefix_order:

1355

keys = prefix_split_keys[prefix]

1356

non_local = prefix_split_non_local_keys.get(prefix, [])

1357

1358

this_size = self._index._get_total_build_size(keys, positions)

1359

cur_size += this_size

1360

cur_keys.extend(keys)

1361

cur_non_local.update(non_local)

1362

if cur_size > _min_buffer_size:

1363

result.append((cur_keys, cur_non_local))

1364

sizes.append(cur_size)

1365

cur_keys = []

1366

cur_non_local = set()

1367

cur_size = 0

1368

if cur_keys:

1369

result.append((cur_keys, cur_non_local))

1370

sizes.append(cur_size)

1371

return result

1120

1372

1121

1373

def get_record_stream(self, keys, ordering, include_delta_closure):

1122

1374

"""Get a stream of records for keys.

1135

1387

if not keys:

1136

1388

return

1137

1389

if not self._index.has_graph:

1138

# Cannot topological order when no graph has been stored.

1390

# Cannot sort when no graph has been stored.

1139

1391

ordering = 'unordered'

1392

1393

remaining_keys = keys

1394

while True:

1395

try:

1396

keys = set(remaining_keys)

1397

for content_factory in self._get_remaining_record_stream(keys,

1398

ordering, include_delta_closure):

1399

remaining_keys.discard(content_factory.key)

1400

yield content_factory

1401

return

1402

except errors.RetryWithNewPacks, e:

1403

self._access.reload_or_raise(e)

1404

1405

def _get_remaining_record_stream(self, keys, ordering,

1406

include_delta_closure):

1407

"""This function is the 'retry' portion for get_record_stream."""

1140

1408

if include_delta_closure:

1141

1409

positions = self._get_components_positions(keys, allow_missing=True)

1142

1410

else:

1148

1416

absent_keys = keys.difference(set(positions))

1149

1417

# There may be more absent keys : if we're missing the basis component

1150

1418

# and are trying to include the delta closure.

1419

# XXX: We should not ever need to examine remote sources because we do

1420

# not permit deltas across versioned files boundaries.

1151

1421

if include_delta_closure:

1152

1422

needed_from_fallback = set()

1153

1423

# Build up reconstructable_keys dict. key:True in this dict means

1179

1449

needed_from_fallback.add(key)

1180

1450

# Double index lookups here : need a unified api ?

1181

1451

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1182

if ordering == 'topological':

1183

# Global topological sort

1184

present_keys = tsort.topo_sort(global_map)

1452

if ordering in ('topological', 'groupcompress'):

1453

if ordering == 'topological':

1454

# Global topological sort

1455

present_keys = tsort.topo_sort(global_map)

1456

else:

1457

present_keys = sort_groupcompress(global_map)

1185

1458

# Now group by source:

1186

1459

source_keys = []

1187

1460

current_source = None

1197

1470

else:

1198

1471

if ordering != 'unordered':

1199

1472

raise AssertionError('valid values for ordering are:'

1200

' "unordered" or "topological" not: %r'

1473

' "unordered", "groupcompress" or "topological" not: %r'

1201

1474

% (ordering,))

1202

1475

# Just group by source; remote sources first.

1203

1476

present_keys = []

1207

1480

for key in parent_map:

1208

1481

present_keys.append(key)

1209

1482

source_keys[-1][1].append(key)

1483

# We have been requested to return these records in an order that

1484

# suits us. So we ask the index to give us an optimally sorted

1485

# order.

1486

for source, sub_keys in source_keys:

1487

if source is parent_maps[0]:

1488

# Only sort the keys for this VF

1489

self._index._sort_keys_by_io(sub_keys, positions)

1210

1490

absent_keys = keys - set(global_map)

1211

1491

for key in absent_keys:

1212

1492

yield AbsentContentFactory(key)

1217

1497

if include_delta_closure:

1218

1498

# XXX: get_content_maps performs its own index queries; allow state

1219

1499

# to be passed in.

1220

text_map, _ = self._get_content_maps(present_keys,

1221

needed_from_fallback - absent_keys)

1222

for key in present_keys:

1223

yield FulltextContentFactory(key, global_map[key], None,

1224

''.join(text_map[key]))

1500

non_local_keys = needed_from_fallback - absent_keys

1501

for keys, non_local_keys in self._group_keys_for_io(present_keys,

1502

non_local_keys,

1503

positions):

1504

generator = _VFContentMapGenerator(self, keys, non_local_keys,

1505

global_map,

1506

ordering=ordering)

1507

for record in generator.get_record_stream():

1508

yield record

1225

1509

else:

1226

1510

for source, keys in source_keys:

1227

1511

if source is parent_maps[0]:

1259

1543

def insert_record_stream(self, stream):

1260

1544

"""Insert a record stream into this container.

1261

1545

1262

:param stream: A stream of records to insert.

1546

:param stream: A stream of records to insert.

1263

1547

:return: None

1264

1548

:seealso VersionedFiles.get_record_stream:

1265

1549

"""

1271

1555

adapter = adapter_factory(self)

1272

1556

adapters[adapter_key] = adapter

1273

1557

return adapter

1558

delta_types = set()

1274

1559

if self._factory.annotated:

1275

1560

# self is annotated, we need annotated knits to use directly.

1276

1561

annotated = "annotated-"

1280

1565

annotated = ""

1281

1566

convertibles = set(["knit-annotated-ft-gz"])

1282

1567

if self._max_delta_chain:

1568

delta_types.add("knit-annotated-delta-gz")

1283

1569

convertibles.add("knit-annotated-delta-gz")

1284

1570

# The set of types we can cheaply adapt without needing basis texts.

1285

1571

native_types = set()

1286

1572

if self._max_delta_chain:

1287

1573

native_types.add("knit-%sdelta-gz" % annotated)

1574

delta_types.add("knit-%sdelta-gz" % annotated)

1288

1575

native_types.add("knit-%sft-gz" % annotated)

1289

1576

knit_types = native_types.union(convertibles)

1290

1577

adapters = {}

1294

1581

# can't generate annotations from new deltas until their basis parent

1295

1582

# is present anyway, so we get away with not needing an index that

1296

1583

# includes the new keys.

1584

1585

# See <http://launchpad.net/bugs/300177> about ordering of compression

1586

# parents in the records - to be conservative, we insist that all

1587

# parents must be present to avoid expanding to a fulltext.

1588

1297

1589

# key = basis_parent, value = index entry to add

1298

1590

buffered_index_entries = {}

1299

1591

for record in stream:

1592

buffered = False

1300

1593

parents = record.parents

1594

if record.storage_kind in delta_types:

1595

# TODO: eventually the record itself should track

1596

# compression_parent

1597

compression_parent = parents[0]

1598

else:

1599

compression_parent = None

1301

1600

# Raise an error when a record is missing.

1302

1601

if record.storage_kind == 'absent':

1303

1602

raise RevisionNotPresent([record.key], self)

1304

if record.storage_kind in knit_types:

1603

elif ((record.storage_kind in knit_types)

1604

and (compression_parent is None

1605

or not self._fallback_vfs

1606

or self._index.has_key(compression_parent)

1607

or not self.has_key(compression_parent))):

1608

# we can insert the knit record literally if either it has no

1609

# compression parent OR we already have its basis in this kvf

1610

# OR the basis is not present even in the fallbacks. In the

1611

# last case it will either turn up later in the stream and all

1612

# will be well, or it won't turn up at all and we'll raise an

1613

# error at the end.

1614

1615

# TODO: self.has_key is somewhat redundant with

1616

# self._index.has_key; we really want something that directly

1617

# asks if it's only present in the fallbacks. -- mbp 20081119

1305

1618

if record.storage_kind not in native_types:

1306

1619

try:

1307

1620

adapter_key = (record.storage_kind, "knit-delta-gz")

1309

1622

except KeyError:

1310

1623

adapter_key = (record.storage_kind, "knit-ft-gz")

1311

1624

adapter = get_adapter(adapter_key)

1312

bytes = adapter.get_bytes(

1313

record, record.get_bytes_as(record.storage_kind))

1625

bytes = adapter.get_bytes(record)

1314

1626

else:

1315

bytes = record.get_bytes_as(record.storage_kind)

1627

# It's a knit record, it has a _raw_record field (even if

1628

# it was reconstituted from a network stream).

1629

bytes = record._raw_record

1316

1630

options = [record._build_details[0]]

1317

1631

if record._build_details[1]:

1318

1632

options.append('no-eol')

1327

1641

access_memo = self._access.add_raw_records(

1328

1642

[(record.key, len(bytes))], bytes)[0]

1329

1643

index_entry = (record.key, options, access_memo, parents)

1330

buffered = False

1331

1644

if 'fulltext' not in options:

1332

basis_parent = parents[0]

1645

# Not a fulltext, so we need to make sure the compression

1646

# parent will also be present.

1333

1647

# Note that pack backed knits don't need to buffer here

1334

1648

# because they buffer all writes to the transaction level,

1335

1649

# but we don't expose that difference at the index level. If

1336

1650

# the query here has sufficient cost to show up in

1337

1651

# profiling we should do that.

1338

if basis_parent not in self.get_parent_map([basis_parent]):

1652

1653

# They're required to be physically in this

1654

# KnitVersionedFiles, not in a fallback.

1655

if not self._index.has_key(compression_parent):

1339

1656

pending = buffered_index_entries.setdefault(

1340

basis_parent, [])

1657

compression_parent, [])

1341

1658

pending.append(index_entry)

1342

1659

buffered = True

1343

1660

if not buffered:

1344

1661

self._index.add_records([index_entry])

1345

elif record.storage_kind == 'fulltext':

1662

elif record.storage_kind == 'chunked':

1346

1663

self.add_lines(record.key, parents,

1347

split_lines(record.get_bytes_as('fulltext')))

1664

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1348

1665

else:

1349

adapter_key = record.storage_kind, 'fulltext'

1350

adapter = get_adapter(adapter_key)

1351

lines = split_lines(adapter.get_bytes(

1352

record, record.get_bytes_as(record.storage_kind)))

1666

# Not suitable for direct insertion as a

1667

# delta, either because it's not the right format, or this

1668

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1669

# 0) or because it depends on a base only present in the

1670

# fallback kvfs.

1671

self._access.flush()

1672

try:

1673

# Try getting a fulltext directly from the record.

1674

bytes = record.get_bytes_as('fulltext')

1675

except errors.UnavailableRepresentation:

1676

adapter_key = record.storage_kind, 'fulltext'

1677

adapter = get_adapter(adapter_key)

1678

bytes = adapter.get_bytes(record)

1679

lines = split_lines(bytes)

1353

1680

try:

1354

1681

self.add_lines(record.key, parents, lines)

1355

1682

except errors.RevisionAlreadyPresent:

1356

1683

pass

1357

1684

# Add any records whose basis parent is now available.

1358

added_keys = [record.key]

1359

while added_keys:

1360

key = added_keys.pop(0)

1361

if key in buffered_index_entries:

1362

index_entries = buffered_index_entries[key]

1363

self._index.add_records(index_entries)

1364

added_keys.extend(

1365

[index_entry[0] for index_entry in index_entries])

1366

del buffered_index_entries[key]

1367

# If there were any deltas which had a missing basis parent, error.

1685

if not buffered:

1686

added_keys = [record.key]

1687

while added_keys:

1688

key = added_keys.pop(0)

1689

if key in buffered_index_entries:

1690

index_entries = buffered_index_entries[key]

1691

self._index.add_records(index_entries)

1692

added_keys.extend(

1693

[index_entry[0] for index_entry in index_entries])

1694

del buffered_index_entries[key]

1368

1695

if buffered_index_entries:

1369

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1370

self)

1696

# There were index entries buffered at the end of the stream,

1697

# So these need to be added (if the index supports holding such

1698

# entries for later insertion)

1699

for key in buffered_index_entries:

1700

index_entries = buffered_index_entries[key]

1701

self._index.add_records(index_entries,

1702

missing_compression_parents=True)

1703

1704

def get_missing_compression_parent_keys(self):

1705

"""Return an iterable of keys of missing compression parents.

1706

1707

Check this after calling insert_record_stream to find out if there are

1708

any missing compression parents. If there are, the records that

1709

depend on them are not able to be inserted safely. For atomic

1710

KnitVersionedFiles built on packs, the transaction should be aborted or

1711

suspended - commit will fail at this point. Nonatomic knits will error

1712

earlier because they have no staging area to put pending entries into.

1713

"""

1714

return self._index.get_missing_compression_parents()

1371

1715

1372

1716

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1373

1717

"""Iterate over the lines in the versioned files from keys.

1384

1728

is an iterator).

1385

1729

1386

1730

NOTES:

1387

* Lines are normalised by the underlying store: they will all have \n

1731

* Lines are normalised by the underlying store: they will all have \\n

1388

1732

terminators.

1389

1733

* Lines are returned in arbitrary order.

1734

* If a requested key did not change any lines (or didn't have any

1735

lines), it may not be mentioned at all in the result.

1390

1736

1737

:param pb: Progress bar supplied by caller.

1391

1738

:return: An iterator over (line, key).

1392

1739

"""

1393

1740

if pb is None:

1394

1741

pb = progress.DummyProgress()

1395

1742

keys = set(keys)

1396

1743

total = len(keys)

1397

# we don't care about inclusions, the caller cares.

1398

# but we need to setup a list of records to visit.

1399

# we need key, position, length

1400

key_records = []

1401

build_details = self._index.get_build_details(keys)

1402

for key, details in build_details.iteritems():

1403

if key in keys:

1404

key_records.append((key, details[0]))

1405

keys.remove(key)

1406

records_iter = enumerate(self._read_records_iter(key_records))

1407

for (key_idx, (key, data, sha_value)) in records_iter:

1408

pb.update('Walking content.', key_idx, total)

1409

compression_parent = build_details[key][1]

1410

if compression_parent is None:

1411

# fulltext

1412

line_iterator = self._factory.get_fulltext_content(data)

1413

else:

1414

# Delta

1415

line_iterator = self._factory.get_linedelta_content(data)

1416

# XXX: It might be more efficient to yield (key,

1417

# line_iterator) in the future. However for now, this is a simpler

1418

# change to integrate into the rest of the codebase. RBC 20071110

1419

for line in line_iterator:

1420

yield line, key

1744

done = False

1745

while not done:

1746

try:

1747

# we don't care about inclusions, the caller cares.

1748

# but we need to setup a list of records to visit.

1749

# we need key, position, length

1750

key_records = []

1751

build_details = self._index.get_build_details(keys)

1752

for key, details in build_details.iteritems():

1753

if key in keys:

1754

key_records.append((key, details[0]))

1755

records_iter = enumerate(self._read_records_iter(key_records))

1756

for (key_idx, (key, data, sha_value)) in records_iter:

1757

pb.update('Walking content', key_idx, total)

1758

compression_parent = build_details[key][1]

1759

if compression_parent is None:

1760

# fulltext

1761

line_iterator = self._factory.get_fulltext_content(data)

1762

else:

1763

# Delta

1764

line_iterator = self._factory.get_linedelta_content(data)

1765

# Now that we are yielding the data for this key, remove it

1766

# from the list

1767

keys.remove(key)

1768

# XXX: It might be more efficient to yield (key,

1769

# line_iterator) in the future. However for now, this is a

1770

# simpler change to integrate into the rest of the

1771

# codebase. RBC 20071110

1772

for line in line_iterator:

1773

yield line, key

1774

done = True

1775

except errors.RetryWithNewPacks, e:

1776

self._access.reload_or_raise(e)

1777

# If there are still keys we've not yet found, we look in the fallback

1778

# vfs, and hope to find them there. Note that if the keys are found

1779

# but had no changes or no content, the fallback may not return

1780

# anything.

1781

if keys and not self._fallback_vfs:

1782

# XXX: strictly the second parameter is meant to be the file id

1783

# but it's not easily accessible here.

1784

raise RevisionNotPresent(keys, repr(self))

1421

1785

for source in self._fallback_vfs:

1422

1786

if not keys:

1423

1787

break

1426

1790

source_keys.add(key)

1427

1791

yield line, key

1428

1792

keys.difference_update(source_keys)

1429

if keys:

1430

raise RevisionNotPresent(keys, self.filename)

1431

pb.update('Walking content.', total, total)

1793

pb.update('Walking content', total, total)

1432

1794

1433

1795

def _make_line_delta(self, delta_seq, new_content):

1434

1796

"""Generate a line delta from delta_seq and new_content."""

1443

1805

delta=None, annotated=None,

1444

1806

left_matching_blocks=None):

1445

1807

"""Merge annotations for content and generate deltas.

1446

1808

1447

1809

This is done by comparing the annotations based on changes to the text

1448

1810

and generating a delta on the resulting full texts. If annotations are

1449

1811

not being created then a simple delta is created.

1531

1893

rec[1], record_contents))

1532

1894

if last_line != 'end %s\n' % rec[1]:

1533

1895

raise KnitCorrupt(self,

1534

'unexpected version end line %r, wanted %r'

1896

'unexpected version end line %r, wanted %r'

1535

1897

% (last_line, rec[1]))

1536

1898

df.close()

1537

1899

return rec, record_contents

1554

1916

if not needed_records:

1555

1917

return

1556

1918

1557

# The transport optimizes the fetching as well

1919

# The transport optimizes the fetching as well

1558

1920

# (ie, reads continuous ranges.)

1559

1921

raw_data = self._access.get_raw_records(

1560

1922

[index_memo for key, index_memo in needed_records])

1570

1932

This unpacks enough of the text record to validate the id is

1571

1933

as expected but thats all.

1572

1934

1573

Each item the iterator yields is (key, bytes, sha1_of_full_text).

1935

Each item the iterator yields is (key, bytes,

1936

expected_sha1_of_full_text).

1937

"""

1938

for key, data in self._read_records_iter_unchecked(records):

1939

# validate the header (note that we can only use the suffix in

1940

# current knit records).

1941

df, rec = self._parse_record_header(key, data)

1942

df.close()

1943

yield key, data, rec[3]

1944

1945

def _read_records_iter_unchecked(self, records):

1946

"""Read text records from data file and yield raw data.

1947

1948

No validation is done.

1949

1950

Yields tuples of (key, data).

1574

1951

"""

1575

1952

# setup an iterator of the external records:

1576

1953

# uses readv so nice and fast we hope.

1582

1959

1583

1960

for key, index_memo in records:

1584

1961

data = raw_records.next()

1585

# validate the header (note that we can only use the suffix in

1586

# current knit records).

1587

df, rec = self._parse_record_header(key, data)

1588

df.close()

1589

yield key, data, rec[3]

1962

yield key, data

1590

1963

1591

1964

def _record_to_data(self, key, digest, lines, dense_lines=None):

1592

1965

"""Convert key, digest, lines into a raw data block.

1593

1966

1594

1967

:param key: The key of the record. Currently keys are always serialised

1595

1968

using just the trailing component.

1596

1969

:param dense_lines: The bytes of lines but in a denser form. For

1601

1974

function spends less time resizing the final string.

1602

1975

:return: (len, a StringIO instance with the raw data ready to read.)

1603

1976

"""

1604

# Note: using a string copy here increases memory pressure with e.g.

1605

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1606

# when doing the initial commit of a mozilla tree. RBC 20070921

1607

bytes = ''.join(chain(

1608

["version %s %d %s\n" % (key[-1],

1609

len(lines),

1610

digest)],

1611

dense_lines or lines,

1612

["end %s\n" % key[-1]]))

1613

if type(bytes) != str:

1614

raise AssertionError(

1615

'data must be plain bytes was %s' % type(bytes))

1977

chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)]

1978

chunks.extend(dense_lines or lines)

1979

chunks.append("end %s\n" % key[-1])

1980

for chunk in chunks:

1981

if type(chunk) is not str:

1982

raise AssertionError(

1983

'data must be plain bytes was %s' % type(chunk))

1616

1984

if lines and lines[-1][-1] != '\n':

1617

1985

raise ValueError('corrupt lines value %r' % lines)

1618

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1986

compressed_bytes = tuned_gzip.chunks_to_gzip(chunks)

1619

1987

return len(compressed_bytes), compressed_bytes

1620

1988

1621

1989

def _split_header(self, line):

1636

2004

return result

1637

2005

1638

2006

2007

class _ContentMapGenerator(object):

2008

"""Generate texts or expose raw deltas for a set of texts."""

2009

2010

def __init__(self, ordering='unordered'):

2011

self._ordering = ordering

2012

2013

def _get_content(self, key):

2014

"""Get the content object for key."""

2015

# Note that _get_content is only called when the _ContentMapGenerator

2016

# has been constructed with just one key requested for reconstruction.

2017

if key in self.nonlocal_keys:

2018

record = self.get_record_stream().next()

2019

# Create a content object on the fly

2020

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

2021

return PlainKnitContent(lines, record.key)

2022

else:

2023

# local keys we can ask for directly

2024

return self._get_one_work(key)

2025

2026

def get_record_stream(self):

2027

"""Get a record stream for the keys requested during __init__."""

2028

for record in self._work():

2029

yield record

2030

2031

def _work(self):

2032

"""Produce maps of text and KnitContents as dicts.

2033

2034

:return: (text_map, content_map) where text_map contains the texts for

2035

the requested versions and content_map contains the KnitContents.

2036

"""

2037

# NB: By definition we never need to read remote sources unless texts

2038

# are requested from them: we don't delta across stores - and we

2039

# explicitly do not want to to prevent data loss situations.

2040

if self.global_map is None:

2041

self.global_map = self.vf.get_parent_map(self.keys)

2042

nonlocal_keys = self.nonlocal_keys

2043

2044

missing_keys = set(nonlocal_keys)

2045

# Read from remote versioned file instances and provide to our caller.

2046

for source in self.vf._fallback_vfs:

2047

if not missing_keys:

2048

break

2049

# Loop over fallback repositories asking them for texts - ignore

2050

# any missing from a particular fallback.

2051

for record in source.get_record_stream(missing_keys,

2052

self._ordering, True):

2053

if record.storage_kind == 'absent':

2054

# Not in thie particular stream, may be in one of the

2055

# other fallback vfs objects.

2056

continue

2057

missing_keys.remove(record.key)

2058

yield record

2059

2060

if self._raw_record_map is None:

2061

raise AssertionError('_raw_record_map should have been filled')

2062

first = True

2063

for key in self.keys:

2064

if key in self.nonlocal_keys:

2065

continue

2066

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2067

first = False

2068

2069

def _get_one_work(self, requested_key):

2070

# Now, if we have calculated everything already, just return the

2071

# desired text.

2072

if requested_key in self._contents_map:

2073

return self._contents_map[requested_key]

2074

# To simplify things, parse everything at once - code that wants one text

2075

# probably wants them all.

2076

# FUTURE: This function could be improved for the 'extract many' case

2077

# by tracking each component and only doing the copy when the number of

2078

# children than need to apply delta's to it is > 1 or it is part of the

2079

# final output.

2080

multiple_versions = len(self.keys) != 1

2081

if self._record_map is None:

2082

self._record_map = self.vf._raw_map_to_record_map(

2083

self._raw_record_map)

2084

record_map = self._record_map

2085

# raw_record_map is key:

2086

# Have read and parsed records at this point.

2087

for key in self.keys:

2088

if key in self.nonlocal_keys:

2089

# already handled

2090

continue

2091

components = []

2092

cursor = key

2093

while cursor is not None:

2094

try:

2095

record, record_details, digest, next = record_map[cursor]

2096

except KeyError:

2097

raise RevisionNotPresent(cursor, self)

2098

components.append((cursor, record, record_details, digest))

2099

cursor = next

2100

if cursor in self._contents_map:

2101

# no need to plan further back

2102

components.append((cursor, None, None, None))

2103

break

2104

2105

content = None

2106

for (component_id, record, record_details,

2107

digest) in reversed(components):

2108

if component_id in self._contents_map:

2109

content = self._contents_map[component_id]

2110

else:

2111

content, delta = self._factory.parse_record(key[-1],

2112

record, record_details, content,

2113

copy_base_content=multiple_versions)

2114

if multiple_versions:

2115

self._contents_map[component_id] = content

2116

2117

# digest here is the digest from the last applied component.

2118

text = content.text()

2119

actual_sha = sha_strings(text)

2120

if actual_sha != digest:

2121

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

2122

if multiple_versions:

2123

return self._contents_map[requested_key]

2124

else:

2125

return content

2126

2127

def _wire_bytes(self):

2128

"""Get the bytes to put on the wire for 'key'.

2129

2130

The first collection of bytes asked for returns the serialised

2131

raw_record_map and the additional details (key, parent) for key.

2132

Subsequent calls return just the additional details (key, parent).

2133

The wire storage_kind given for the first key is 'knit-delta-closure',

2134

For subsequent keys it is 'knit-delta-closure-ref'.

2135

2136

:param key: A key from the content generator.

2137

:return: Bytes to put on the wire.

2138

"""

2139

lines = []

2140

# kind marker for dispatch on the far side,

2141

lines.append('knit-delta-closure')

2142

# Annotated or not

2143

if self.vf._factory.annotated:

2144

lines.append('annotated')

2145

else:

2146

lines.append('')

2147

# then the list of keys

2148

lines.append('\t'.join(['\x00'.join(key) for key in self.keys

2149

if key not in self.nonlocal_keys]))

2150

# then the _raw_record_map in serialised form:

2151

map_byte_list = []

2152

# for each item in the map:

2153

# 1 line with key

2154

# 1 line with parents if the key is to be yielded (None: for None, '' for ())

2155

# one line with method

2156

# one line with noeol

2157

# one line with next ('' for None)

2158

# one line with byte count of the record bytes

2159

# the record bytes

2160

for key, (record_bytes, (method, noeol), next) in \

2161

self._raw_record_map.iteritems():

2162

key_bytes = '\x00'.join(key)

2163

parents = self.global_map.get(key, None)

2164

if parents is None:

2165

parent_bytes = 'None:'

2166

else:

2167

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

2168

method_bytes = method

2169

if noeol:

2170

noeol_bytes = "T"

2171

else:

2172

noeol_bytes = "F"

2173

if next:

2174

next_bytes = '\x00'.join(next)

2175

else:

2176

next_bytes = ''

2177

map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (

2178

key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,

2179

len(record_bytes), record_bytes))

2180

map_bytes = ''.join(map_byte_list)

2181

lines.append(map_bytes)

2182

bytes = '\n'.join(lines)

2183

return bytes

2184

2185

2186

class _VFContentMapGenerator(_ContentMapGenerator):

2187

"""Content map generator reading from a VersionedFiles object."""

2188

2189

def __init__(self, versioned_files, keys, nonlocal_keys=None,

2190

global_map=None, raw_record_map=None, ordering='unordered'):

2191

"""Create a _ContentMapGenerator.

2192

2193

:param versioned_files: The versioned files that the texts are being

2194

extracted from.

2195

:param keys: The keys to produce content maps for.

2196

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

2197

which are known to not be in this knit, but rather in one of the

2198

fallback knits.

2199

:param global_map: The result of get_parent_map(keys) (or a supermap).

2200

This is required if get_record_stream() is to be used.

2201

:param raw_record_map: A unparsed raw record map to use for answering

2202

contents.

2203

"""

2204

_ContentMapGenerator.__init__(self, ordering=ordering)

2205

# The vf to source data from

2206

self.vf = versioned_files

2207

# The keys desired

2208

self.keys = list(keys)

2209

# Keys known to be in fallback vfs objects

2210

if nonlocal_keys is None:

2211

self.nonlocal_keys = set()

2212

else:

2213

self.nonlocal_keys = frozenset(nonlocal_keys)

2214

# Parents data for keys to be returned in get_record_stream

2215

self.global_map = global_map

2216

# The chunked lists for self.keys in text form

2217

self._text_map = {}

2218

# A cache of KnitContent objects used in extracting texts.

2219

self._contents_map = {}

2220

# All the knit records needed to assemble the requested keys as full

2221

# texts.

2222

self._record_map = None

2223

if raw_record_map is None:

2224

self._raw_record_map = self.vf._get_record_map_unparsed(keys,

2225

allow_missing=True)

2226

else:

2227

self._raw_record_map = raw_record_map

2228

# the factory for parsing records

2229

self._factory = self.vf._factory

2230

2231

2232

class _NetworkContentMapGenerator(_ContentMapGenerator):

2233

"""Content map generator sourced from a network stream."""

2234

2235

def __init__(self, bytes, line_end):

2236

"""Construct a _NetworkContentMapGenerator from a bytes block."""

2237

self._bytes = bytes

2238

self.global_map = {}

2239

self._raw_record_map = {}

2240

self._contents_map = {}

2241

self._record_map = None

2242

self.nonlocal_keys = []

2243

# Get access to record parsing facilities

2244

self.vf = KnitVersionedFiles(None, None)

2245

start = line_end

2246

# Annotated or not

2247

line_end = bytes.find('\n', start)

2248

line = bytes[start:line_end]

2249

start = line_end + 1

2250

if line == 'annotated':

2251

self._factory = KnitAnnotateFactory()

2252

else:

2253

self._factory = KnitPlainFactory()

2254

# list of keys to emit in get_record_stream

2255

line_end = bytes.find('\n', start)

2256

line = bytes[start:line_end]

2257

start = line_end + 1

2258

self.keys = [

2259

tuple(segment.split('\x00')) for segment in line.split('\t')

2260

if segment]

2261

# now a loop until the end. XXX: It would be nice if this was just a

2262

# bunch of the same records as get_record_stream(..., False) gives, but

2263

# there is a decent sized gap stopping that at the moment.

2264

end = len(bytes)

2265

while start < end:

2266

# 1 line with key

2267

line_end = bytes.find('\n', start)

2268

key = tuple(bytes[start:line_end].split('\x00'))

2269

start = line_end + 1

2270

# 1 line with parents (None: for None, '' for ())

2271

line_end = bytes.find('\n', start)

2272

line = bytes[start:line_end]

2273

if line == 'None:':

2274

parents = None

2275

else:

2276

parents = tuple(

2277

[tuple(segment.split('\x00')) for segment in line.split('\t')

2278

if segment])

2279

self.global_map[key] = parents

2280

start = line_end + 1

2281

# one line with method

2282

line_end = bytes.find('\n', start)

2283

line = bytes[start:line_end]

2284

method = line

2285

start = line_end + 1

2286

# one line with noeol

2287

line_end = bytes.find('\n', start)

2288

line = bytes[start:line_end]

2289

noeol = line == "T"

2290

start = line_end + 1

2291

# one line with next ('' for None)

2292

line_end = bytes.find('\n', start)

2293

line = bytes[start:line_end]

2294

if not line:

2295

next = None

2296

else:

2297

next = tuple(bytes[start:line_end].split('\x00'))

2298

start = line_end + 1

2299

# one line with byte count of the record bytes

2300

line_end = bytes.find('\n', start)

2301

line = bytes[start:line_end]

2302

count = int(line)

2303

start = line_end + 1

2304

# the record bytes

2305

record_bytes = bytes[start:start+count]

2306

start = start + count

2307

# put it in the map

2308

self._raw_record_map[key] = (record_bytes, (method, noeol), next)

2309

2310

def get_record_stream(self):

2311

"""Get a record stream for for keys requested by the bytestream."""

2312

first = True

2313

for key in self.keys:

2314

yield LazyKnitContentFactory(key, self.global_map[key], self, first)

2315

first = False

2316

2317

def _wire_bytes(self):

2318

return self._bytes

2319

1639

2320

1640

2321

class _KndxIndex(object):

1641

2322

"""Manages knit index files

1656

2337

1657

2338

Duplicate entries may be written to the index for a single version id

1658

2339

if this is done then the latter one completely replaces the former:

1659

this allows updates to correct version and parent information.

2340

this allows updates to correct version and parent information.

1660

2341

Note that the two entries may share the delta, and that successive

1661

2342

annotations and references MUST point to the first entry.

1662

2343

1663

2344

The index file on disc contains a header, followed by one line per knit

1664

2345

record. The same revision can be present in an index file more than once.

1665

The first occurrence gets assigned a sequence number starting from 0.

1666

2346

The first occurrence gets assigned a sequence number starting from 0.

2347

1667

2348

The format of a single line is

1668

2349

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1669

2350

REVISION_ID is a utf8-encoded revision id

1670

FLAGS is a comma separated list of flags about the record. Values include

2351

FLAGS is a comma separated list of flags about the record. Values include

1671

2352

no-eol, line-delta, fulltext.

1672

2353

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1673

2354

that the the compressed data starts at.

1677

2358

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1678

2359

revision id already in the knit that is a parent of REVISION_ID.

1679

2360

The ' :' marker is the end of record marker.

1680

2361

1681

2362

partial writes:

1682

2363

when a write is interrupted to the index file, it will result in a line

1683

2364

that does not end in ' :'. If the ' :' is not present at the end of a line,

1708

2389

self._reset_cache()

1709

2390

self.has_graph = True

1710

2391

1711

def add_records(self, records, random_id=False):

2392

def add_records(self, records, random_id=False, missing_compression_parents=False):

1712

2393

"""Add multiple records to the index.

1713

2394

1714

2395

:param records: a list of tuples:

1715

2396

(key, options, access_memo, parents).

1716

2397

:param random_id: If True the ids being added were randomly generated

1717

2398

and no check for existence will be performed.

2399

:param missing_compression_parents: If True the records being added are

2400

only compressed against texts already in the index (or inside

2401

records). If False the records all refer to unavailable texts (or

2402

texts inside records) as compression parents.

1718

2403

"""

2404

if missing_compression_parents:

2405

# It might be nice to get the edge of the records. But keys isn't

2406

# _wrong_.

2407

keys = sorted(record[0] for record in records)

2408

raise errors.RevisionNotPresent(keys, self)

1719

2409

paths = {}

1720

2410

for record in records:

1721

2411

key = record[0]

1738

2428

line = "\n%s %s %s %s %s :" % (

1739

2429

key[-1], ','.join(options), pos, size,

1740

2430

self._dictionary_compress(parents))

1741

if type(line) != str:

2431

if type(line) is not str:

1742

2432

raise AssertionError(

1743

2433

'data must be utf8 was %s' % type(line))

1744

2434

lines.append(line)

1752

2442

self._kndx_cache[prefix] = (orig_cache, orig_history)

1753

2443

raise

1754

2444

2445

def scan_unvalidated_index(self, graph_index):

2446

"""See _KnitGraphIndex.scan_unvalidated_index."""

2447

# Because kndx files do not support atomic insertion via separate index

2448

# files, they do not support this method.

2449

raise NotImplementedError(self.scan_unvalidated_index)

2450

2451

def get_missing_compression_parents(self):

2452

"""See _KnitGraphIndex.get_missing_compression_parents."""

2453

# Because kndx files do not support atomic insertion via separate index

2454

# files, they do not support this method.

2455

raise NotImplementedError(self.get_missing_compression_parents)

2456

1755

2457

def _cache_key(self, key, options, pos, size, parent_keys):

1756

2458

"""Cache a version record in the history array and index cache.

1757

2459

1825

2527

extra information about the content which needs to be passed to

1826

2528

Factory.parse_record

1827

2529

"""

1828

prefixes = self._partition_keys(keys)

1829

2530

parent_map = self.get_parent_map(keys)

1830

2531

result = {}

1831

2532

for key in keys:

1865

2566

except KeyError:

1866

2567

raise RevisionNotPresent(key, self)

1867

2568

2569

def find_ancestry(self, keys):

2570

"""See CombinedGraphIndex.find_ancestry()"""

2571

prefixes = set(key[:-1] for key in keys)

2572

self._load_prefixes(prefixes)

2573

result = {}

2574

parent_map = {}

2575

missing_keys = set()

2576

pending_keys = list(keys)

2577

# This assumes that keys will not reference parents in a different

2578

# prefix, which is accurate so far.

2579

while pending_keys:

2580

key = pending_keys.pop()

2581

if key in parent_map:

2582

continue

2583

prefix = key[:-1]

2584

try:

2585

suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]

2586

except KeyError:

2587

missing_keys.add(key)

2588

else:

2589

parent_keys = tuple([prefix + (suffix,)

2590

for suffix in suffix_parents])

2591

parent_map[key] = parent_keys

2592

pending_keys.extend([p for p in parent_keys

2593

if p not in parent_map])

2594

return parent_map, missing_keys

2595

1868

2596

def get_parent_map(self, keys):

1869

2597

"""Get a map of the parents of keys.

1870

2598

1891

2619

1892

2620

def get_position(self, key):

1893

2621

"""Return details needed to access the version.

1894

2622

1895

2623

:return: a tuple (key, data position, size) to hand to the access

1896

2624

logic to get the record.

1897

2625

"""

1900

2628

entry = self._kndx_cache[prefix][0][suffix]

1901

2629

return key, entry[2], entry[3]

1902

2630

2631

has_key = _mod_index._has_key_from_parent_map

2632

1903

2633

def _init_index(self, path, extra_lines=[]):

1904

2634

"""Initialize an index."""

1905

2635

sio = StringIO()

1914

2644

1915

2645

def keys(self):

1916

2646

"""Get all the keys in the collection.

1917

2647

1918

2648

The keys are not ordered.

1919

2649

"""

1920

2650

result = set()

1921

2651

# Identify all key prefixes.

1922

2652

# XXX: A bit hacky, needs polish.

1923

if type(self._mapper) == ConstantMapper:

2653

if type(self._mapper) is ConstantMapper:

1924

2654

prefixes = [()]

1925

2655

else:

1926

2656

relpaths = set()

1933

2663

for suffix in self._kndx_cache[prefix][1]:

1934

2664

result.add(prefix + (suffix,))

1935

2665

return result

1936

2666

1937

2667

def _load_prefixes(self, prefixes):

1938

2668

"""Load the indices for prefixes."""

1939

2669

self._check_read()

1958

2688

del self._history

1959

2689

except NoSuchFile:

1960

2690

self._kndx_cache[prefix] = ({}, [])

1961

if type(self._mapper) == ConstantMapper:

2691

if type(self._mapper) is ConstantMapper:

1962

2692

# preserve behaviour for revisions.kndx etc.

1963

2693

self._init_index(path)

1964

2694

del self._cache

1965

2695

del self._filename

1966

2696

del self._history

1967

2697

2698

missing_keys = _mod_index._missing_keys_from_parent_map

2699

1968

2700

def _partition_keys(self, keys):

1969

2701

"""Turn keys into a dict of prefix:suffix_list."""

1970

2702

result = {}

1975

2707

1976

2708

def _dictionary_compress(self, keys):

1977

2709

"""Dictionary compress keys.

1978

2710

1979

2711

:param keys: The keys to generate references to.

1980

2712

:return: A string representation of keys. keys which are present are

1981

2713

dictionary compressed, and others are emitted as fulltext with a

2009

2741

else:

2010

2742

self._mode = 'r'

2011

2743

2744

def _sort_keys_by_io(self, keys, positions):

2745

"""Figure out an optimal order to read the records for the given keys.

2746

2747

Sort keys, grouped by index and sorted by position.

2748

2749

:param keys: A list of keys whose records we want to read. This will be

2750

sorted 'in-place'.

2751

:param positions: A dict, such as the one returned by

2752

_get_components_positions()

2753

:return: None

2754

"""

2755

def get_sort_key(key):

2756

index_memo = positions[key][1]

2757

# Group by prefix and position. index_memo[0] is the key, so it is

2758

# (file_id, revision_id) and we don't want to sort on revision_id,

2759

# index_memo[1] is the position, and index_memo[2] is the size,

2760

# which doesn't matter for the sort

2761

return index_memo[0][:-1], index_memo[1]

2762

return keys.sort(key=get_sort_key)

2763

2764

_get_total_build_size = _get_total_build_size

2765

2012

2766

def _split_key(self, key):

2013

2767

"""Split key into a prefix and suffix."""

2014

2768

return key[:-1], key[-1]

2015

2769

2016

2770

2771

class _KeyRefs(object):

2772

2773

def __init__(self):

2774

# dict mapping 'key' to 'set of keys referring to that key'

2775

self.refs = {}

2776

2777

def add_references(self, key, refs):

2778

# Record the new references

2779

for referenced in refs:

2780

try:

2781

needed_by = self.refs[referenced]

2782

except KeyError:

2783

needed_by = self.refs[referenced] = set()

2784

needed_by.add(key)

2785

# Discard references satisfied by the new key

2786

self.add_key(key)

2787

2788

def get_unsatisfied_refs(self):

2789

return self.refs.iterkeys()

2790

2791

def add_key(self, key):

2792

try:

2793

del self.refs[key]

2794

except KeyError:

2795

# No keys depended on this key. That's ok.

2796

pass

2797

2798

def add_keys(self, keys):

2799

for key in keys:

2800

self.add_key(key)

2801

2802

def get_referrers(self):

2803

result = set()

2804

for referrers in self.refs.itervalues():

2805

result.update(referrers)

2806

return result

2807

2808

2017

2809

class _KnitGraphIndex(object):

2018

2810

"""A KnitVersionedFiles index layered on GraphIndex."""

2019

2811

2020

2812

def __init__(self, graph_index, is_locked, deltas=False, parents=True,

2021

add_callback=None):

2813

add_callback=None, track_external_parent_refs=False):

2022

2814

"""Construct a KnitGraphIndex on a graph_index.

2023

2815

2024

2816

:param graph_index: An implementation of bzrlib.index.GraphIndex.

2025

2817

:param is_locked: A callback to check whether the object should answer

2026

2818

queries.

2027

2819

:param deltas: Allow delta-compressed records.

2028

:param parents: If True, record knits parents, if not do not record

2820

:param parents: If True, record knits parents, if not do not record

2029

2821

parents.

2030

2822

:param add_callback: If not None, allow additions to the index and call

2031

2823

this callback with a list of added GraphIndex nodes:

2032

2824

[(node, value, node_refs), ...]

2033

2825

:param is_locked: A callback, returns True if the index is locked and

2034

2826

thus usable.

2827

:param track_external_parent_refs: If True, record all external parent

2828

references parents from added records. These can be retrieved

2829

later by calling get_missing_parents().

2035

2830

"""

2036

2831

self._add_callback = add_callback

2037

2832

self._graph_index = graph_index

2044

2839

"parent tracking.")

2045

2840

self.has_graph = parents

2046

2841

self._is_locked = is_locked

2842

self._missing_compression_parents = set()

2843

if track_external_parent_refs:

2844

self._key_dependencies = _KeyRefs()

2845

else:

2846

self._key_dependencies = None

2047

2847

2048

2848

def __repr__(self):

2049

2849

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2050

2850

2051

def add_records(self, records, random_id=False):

2851

def add_records(self, records, random_id=False,

2852

missing_compression_parents=False):

2052

2853

"""Add multiple records to the index.

2053

2854

2054

2855

This function does not insert data into the Immutable GraphIndex

2055

2856

backing the KnitGraphIndex, instead it prepares data for insertion by

2056

2857

the caller and checks that it is safe to insert then calls

2060

2861

(key, options, access_memo, parents).

2061

2862

:param random_id: If True the ids being added were randomly generated

2062

2863

and no check for existence will be performed.

2864

:param missing_compression_parents: If True the records being added are

2865

only compressed against texts already in the index (or inside

2866

records). If False the records all refer to unavailable texts (or

2867

texts inside records) as compression parents.

2063

2868

"""

2064

2869

if not self._add_callback:

2065

2870

raise errors.ReadOnlyError(self)

2067

2872

# anymore.

2068

2873

2069

2874

keys = {}

2875

compression_parents = set()

2876

key_dependencies = self._key_dependencies

2070

2877

for (key, options, access_memo, parents) in records:

2071

2878

if self._parents:

2072

2879

parents = tuple(parents)

2880

if key_dependencies is not None:

2881

key_dependencies.add_references(key, parents)

2073

2882

index, pos, size = access_memo

2074

2883

if 'no-eol' in options:

2075

2884

value = 'N'

2083

2892

if self._deltas:

2084

2893

if 'line-delta' in options:

2085

2894

node_refs = (parents, (parents[0],))

2895

if missing_compression_parents:

2896

compression_parents.add(parents[0])

2086

2897

else:

2087

2898

node_refs = (parents, ())

2088

2899

else:

2098

2909

present_nodes = self._get_entries(keys)

2099

2910

for (index, key, value, node_refs) in present_nodes:

2100

2911

if (value[0] != keys[key][0][0] or

2101

node_refs != keys[key][1]):

2912

node_refs[:1] != keys[key][1][:1]):

2102

2913

raise KnitCorrupt(self, "inconsistent details in add_records"

2103

2914

": %s %s" % ((value, node_refs), keys[key]))

2104

2915

del keys[key]

2110

2921

for key, (value, node_refs) in keys.iteritems():

2111

2922

result.append((key, value))

2112

2923

self._add_callback(result)

2113

2924

if missing_compression_parents:

2925

# This may appear to be incorrect (it does not check for

2926

# compression parents that are in the existing graph index),

2927

# but such records won't have been buffered, so this is

2928

# actually correct: every entry when

2929

# missing_compression_parents==True either has a missing parent, or

2930

# a parent that is one of the keys in records.

2931

compression_parents.difference_update(keys)

2932

self._missing_compression_parents.update(compression_parents)

2933

# Adding records may have satisfied missing compression parents.

2934

self._missing_compression_parents.difference_update(keys)

2935

2936

def scan_unvalidated_index(self, graph_index):

2937

"""Inform this _KnitGraphIndex that there is an unvalidated index.

2938

2939

This allows this _KnitGraphIndex to keep track of any missing

2940

compression parents we may want to have filled in to make those

2941

indices valid.

2942

2943

:param graph_index: A GraphIndex

2944

"""

2945

if self._deltas:

2946

new_missing = graph_index.external_references(ref_list_num=1)

2947

new_missing.difference_update(self.get_parent_map(new_missing))

2948

self._missing_compression_parents.update(new_missing)

2949

if self._key_dependencies is not None:

2950

# Add parent refs from graph_index (and discard parent refs that

2951

# the graph_index has).

2952

for node in graph_index.iter_all_entries():

2953

self._key_dependencies.add_references(node[1], node[3][0])

2954

2955

def get_missing_compression_parents(self):

2956

"""Return the keys of missing compression parents.

2957

2958

Missing compression parents occur when a record stream was missing

2959

basis texts, or a index was scanned that had missing basis texts.

2960

"""

2961

return frozenset(self._missing_compression_parents)

2962

2963

def get_missing_parents(self):

2964

"""Return the keys of missing parents."""

2965

# If updating this, you should also update

2966

# groupcompress._GCGraphIndex.get_missing_parents

2967

# We may have false positives, so filter those out.

2968

self._key_dependencies.add_keys(

2969

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

2970

return frozenset(self._key_dependencies.get_unsatisfied_refs())

2971

2114

2972

def _check_read(self):

2115

2973

"""raise if reads are not permitted."""

2116

2974

if not self._is_locked():

2176

3034

2177

3035

def _get_entries(self, keys, check_present=False):

2178

3036

"""Get the entries for keys.

2179

3037

2180

3038

:param keys: An iterable of index key tuples.

2181

3039

"""

2182

3040

keys = set(keys)

2224

3082

options.append('no-eol')

2225

3083

return options

2226

3084

3085

def find_ancestry(self, keys):

3086

"""See CombinedGraphIndex.find_ancestry()"""

3087

return self._graph_index.find_ancestry(keys, 0)

3088

2227

3089

def get_parent_map(self, keys):

2228

3090

"""Get a map of the parents of keys.

2229

3091

2244

3106

2245

3107

def get_position(self, key):

2246

3108

"""Return details needed to access the version.

2247

3109

2248

3110

:return: a tuple (index, data position, size) to hand to the access

2249

3111

logic to get the record.

2250

3112

"""

2251

3113

node = self._get_node(key)

2252

3114

return self._node_to_position(node)

2253

3115

3116

has_key = _mod_index._has_key_from_parent_map

3117

2254

3118

def keys(self):

2255

3119

"""Get all the keys in the collection.

2256

3120

2257

3121

The keys are not ordered.

2258

3122

"""

2259

3123

self._check_read()

2260

3124

return [node[1] for node in self._graph_index.iter_all_entries()]

2261

3125

3126

missing_keys = _mod_index._missing_keys_from_parent_map

3127

2262

3128

def _node_to_position(self, node):

2263

3129

"""Convert an index value to position details."""

2264

3130

bits = node[2][1:].split(' ')

2265

3131

return node[0], int(bits[0]), int(bits[1])

2266

3132

3133

def _sort_keys_by_io(self, keys, positions):

3134

"""Figure out an optimal order to read the records for the given keys.

3135

3136

Sort keys, grouped by index and sorted by position.

3137

3138

:param keys: A list of keys whose records we want to read. This will be

3139

sorted 'in-place'.

3140

:param positions: A dict, such as the one returned by

3141

_get_components_positions()

3142

:return: None

3143

"""

3144

def get_index_memo(key):

3145

# index_memo is at offset [1]. It is made up of (GraphIndex,

3146

# position, size). GI is an object, which will be unique for each

3147

# pack file. This causes us to group by pack file, then sort by

3148

# position. Size doesn't matter, but it isn't worth breaking up the

3149

# tuple.

3150

return positions[key][1]

3151

return keys.sort(key=get_index_memo)

3152

3153

_get_total_build_size = _get_total_build_size

3154

2267

3155

2268

3156

class _KnitKeyAccess(object):

2269

3157

"""Access to records in .knit files."""

2290

3178

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

2291

3179

length), where the key is the record key.

2292

3180

"""

2293

if type(raw_data) != str:

3181

if type(raw_data) is not str:

2294

3182

raise AssertionError(

2295

3183

'data must be plain bytes was %s' % type(raw_data))

2296

3184

result = []

2313

3201

result.append((key, base, size))

2314

3202

return result

2315

3203

3204

def flush(self):

3205

"""Flush pending writes on this access object.

3206

3207

For .knit files this is a no-op.

3208

"""

3209

pass

3210

2316

3211

def get_raw_records(self, memos_for_retrieval):

2317

3212

"""Get the raw bytes for a records.

2318

3213

2343

3238

class _DirectPackAccess(object):

2344

3239

"""Access to data in one or more packs with less translation."""

2345

3240

2346

def __init__(self, index_to_packs):

3241

def __init__(self, index_to_packs, reload_func=None, flush_func=None):

2347

3242

"""Create a _DirectPackAccess object.

2348

3243

2349

3244

:param index_to_packs: A dict mapping index objects to the transport

2350

3245

and file names for obtaining data.

3246

:param reload_func: A function to call if we determine that the pack

3247

files have moved and we need to reload our caches. See

3248

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

2351

3249

"""

2352

3250

self._container_writer = None

2353

3251

self._write_index = None

2354

3252

self._indices = index_to_packs

3253

self._reload_func = reload_func

3254

self._flush_func = flush_func

2355

3255

2356

3256

def add_raw_records(self, key_sizes, raw_data):

2357

3257

"""Add raw knit bytes to a storage area.

2367

3267

length), where the index field is the write_index object supplied

2368

3268

to the PackAccess object.

2369

3269

"""

2370

if type(raw_data) != str:

3270

if type(raw_data) is not str:

2371

3271

raise AssertionError(

2372

3272

'data must be plain bytes was %s' % type(raw_data))

2373

3273

result = []

2379

3279

result.append((self._write_index, p_offset, p_length))

2380

3280

return result

2381

3281

3282

def flush(self):

3283

"""Flush pending writes on this access object.

3284

3285

This will flush any buffered writes to a NewPack.

3286

"""

3287

if self._flush_func is not None:

3288

self._flush_func()

3289

2382

3290

def get_raw_records(self, memos_for_retrieval):

2383

3291

"""Get the raw bytes for a records.

2384

3292

2385

:param memos_for_retrieval: An iterable containing the (index, pos,

3293

:param memos_for_retrieval: An iterable containing the (index, pos,

2386

3294

length) memo for retrieving the bytes. The Pack access method

2387

3295

looks up the pack to use for a given record in its index_to_pack

2388

3296

map.

2403

3311

if current_index is not None:

2404

3312

request_lists.append((current_index, current_list))

2405

3313

for index, offsets in request_lists:

2406

transport, path = self._indices[index]

2407

reader = pack.make_readv_reader(transport, path, offsets)

2408

for names, read_func in reader.iter_records():

2409

yield read_func(None)

3314

try:

3315

transport, path = self._indices[index]

3316

except KeyError:

3317

# A KeyError here indicates that someone has triggered an index

3318

# reload, and this index has gone missing, we need to start

3319

# over.

3320

if self._reload_func is None:

3321

# If we don't have a _reload_func there is nothing that can

3322

# be done

3323

raise

3324

raise errors.RetryWithNewPacks(index,

3325

reload_occurred=True,

3326

exc_info=sys.exc_info())

3327

try:

3328

reader = pack.make_readv_reader(transport, path, offsets)

3329

for names, read_func in reader.iter_records():

3330

yield read_func(None)

3331

except errors.NoSuchFile:

3332

# A NoSuchFile error indicates that a pack file has gone

3333

# missing on disk, we need to trigger a reload, and start over.

3334

if self._reload_func is None:

3335

raise

3336

raise errors.RetryWithNewPacks(transport.abspath(path),

3337

reload_occurred=False,

3338

exc_info=sys.exc_info())

2410

3339

2411

3340

def set_writer(self, writer, index, transport_packname):

2412

3341

"""Set a writer to use for adding data."""

2415

3344

self._container_writer = writer

2416

3345

self._write_index = index

2417

3346

3347

def reload_or_raise(self, retry_exc):

3348

"""Try calling the reload function, or re-raise the original exception.

3349

3350

This should be called after _DirectPackAccess raises a

3351

RetryWithNewPacks exception. This function will handle the common logic

3352

of determining when the error is fatal versus being temporary.

3353

It will also make sure that the original exception is raised, rather

3354

than the RetryWithNewPacks exception.

3355

3356

If this function returns, then the calling function should retry

3357

whatever operation was being performed. Otherwise an exception will

3358

be raised.

3359

3360

:param retry_exc: A RetryWithNewPacks exception.

3361

"""

3362

is_error = False

3363

if self._reload_func is None:

3364

is_error = True

3365

elif not self._reload_func():

3366

# The reload claimed that nothing changed

3367

if not retry_exc.reload_occurred:

3368

# If there wasn't an earlier reload, then we really were

3369

# expecting to find changes. We didn't find them, so this is a

3370

# hard error

3371

is_error = True

3372

if is_error:

3373

exc_class, exc_value, exc_traceback = retry_exc.exc_info

3374

raise exc_class, exc_value, exc_traceback

3375

2418

3376

2419

3377

# Deprecated, use PatienceSequenceMatcher instead

2420

3378

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2428

3386

recommended.

2429

3387

"""

2430

3388

annotator = _KnitAnnotator(knit)

2431

return iter(annotator.annotate(revision_id))

2432

2433

2434

class _KnitAnnotator(object):

3389

return iter(annotator.annotate_flat(revision_id))

3390

3391

3392

class _KnitAnnotator(annotate.Annotator):

2435

3393

"""Build up the annotations for a text."""

2436

3394

2437

def __init__(self, knit):

2438

self._knit = knit

2439

2440

# Content objects, differs from fulltexts because of how final newlines

2441

# are treated by knits. the content objects here will always have a

2442

# final newline

2443

self._fulltext_contents = {}

2444

2445

# Annotated lines of specific revisions

2446

self._annotated_lines = {}

2447

2448

# Track the raw data for nodes that we could not process yet.

2449

# This maps the revision_id of the base to a list of children that will

2450

# annotated from it.

2451

self._pending_children = {}

2452

2453

# Nodes which cannot be extracted

2454

self._ghosts = set()

2455

2456

# Track how many children this node has, so we know if we need to keep

2457

# it

2458

self._annotate_children = {}

2459

self._compression_children = {}

3395

def __init__(self, vf):

3396

annotate.Annotator.__init__(self, vf)

3397

3398

# TODO: handle Nodes which cannot be extracted

3399

# self._ghosts = set()

3400

3401

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3402

self._matching_blocks = {}

3403

3404

# KnitContent objects

3405

self._content_objects = {}

3406

# The number of children that depend on this fulltext content object

3407

self._num_compression_children = {}

3408

# Delta records that need their compression parent before they can be

3409

# expanded

3410

self._pending_deltas = {}

3411

# Fulltext records that are waiting for their parents fulltexts before

3412

# they can be yielded for annotation

3413

self._pending_annotation = {}

2460

3414

2461

3415

self._all_build_details = {}

2462

# The children => parent revision_id graph

2463

self._revision_id_graph = {}

2464

2465

self._heads_provider = None

2466

2467

self._nodes_to_keep_annotations = set()

2468

self._generations_until_keep = 100

2469

2470

def set_generations_until_keep(self, value):

2471

"""Set the number of generations before caching a node.

2472

2473

Setting this to -1 will cache every merge node, setting this higher

2474

will cache fewer nodes.

2475

"""

2476

self._generations_until_keep = value

2477

2478

def _add_fulltext_content(self, revision_id, content_obj):

2479

self._fulltext_contents[revision_id] = content_obj

2480

# TODO: jam 20080305 It might be good to check the sha1digest here

2481

return content_obj.text()

2482

2483

def _check_parents(self, child, nodes_to_annotate):

2484

"""Check if all parents have been processed.

2485

2486

:param child: A tuple of (rev_id, parents, raw_content)

2487

:param nodes_to_annotate: If child is ready, add it to

2488

nodes_to_annotate, otherwise put it back in self._pending_children

2489

"""

2490

for parent_id in child[1]:

2491

if (parent_id not in self._annotated_lines):

2492

# This parent is present, but another parent is missing

2493

self._pending_children.setdefault(parent_id,

2494

[]).append(child)

2495

break

2496

else:

2497

# This one is ready to be processed

2498

nodes_to_annotate.append(child)

2499

2500

def _add_annotation(self, revision_id, fulltext, parent_ids,

2501

left_matching_blocks=None):

2502

"""Add an annotation entry.

2503

2504

All parents should already have been annotated.

2505

:return: A list of children that now have their parents satisfied.

2506

"""

2507

a = self._annotated_lines

2508

annotated_parent_lines = [a[p] for p in parent_ids]

2509

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

2510

fulltext, revision_id, left_matching_blocks,

2511

heads_provider=self._get_heads_provider()))

2512

self._annotated_lines[revision_id] = annotated_lines

2513

for p in parent_ids:

2514

ann_children = self._annotate_children[p]

2515

ann_children.remove(revision_id)

2516

if (not ann_children

2517

and p not in self._nodes_to_keep_annotations):

2518

del self._annotated_lines[p]

2519

del self._all_build_details[p]

2520

if p in self._fulltext_contents:

2521

del self._fulltext_contents[p]

2522

# Now that we've added this one, see if there are any pending

2523

# deltas to be done, certainly this parent is finished

2524

nodes_to_annotate = []

2525

for child in self._pending_children.pop(revision_id, []):

2526

self._check_parents(child, nodes_to_annotate)

2527

return nodes_to_annotate

2528

3416

2529

3417

def _get_build_graph(self, key):

2530

3418

"""Get the graphs for building texts and annotations.

2535

3423

fulltext.)

2536

3424

2537

3425

:return: A list of (key, index_memo) records, suitable for

2538

passing to read_records_iter to start reading in the raw data fro/

3426

passing to read_records_iter to start reading in the raw data from

2539

3427

the pack file.

2540

3428

"""

2541

if key in self._annotated_lines:

2542

# Nothing to do

2543

return []

2544

3429

pending = set([key])

2545

3430

records = []

2546

generation = 0

2547

kept_generation = 0

3431

ann_keys = set()

3432

self._num_needed_children[key] = 1

2548

3433

while pending:

2549

3434

# get all pending nodes

2550

generation += 1

2551

3435

this_iteration = pending

2552

build_details = self._knit._index.get_build_details(this_iteration)

3436

build_details = self._vf._index.get_build_details(this_iteration)

2553

3437

self._all_build_details.update(build_details)

2554

# new_nodes = self._knit._index._get_entries(this_iteration)

3438

# new_nodes = self._vf._index._get_entries(this_iteration)

2555

3439

pending = set()

2556

3440

for key, details in build_details.iteritems():

2557

(index_memo, compression_parent, parents,

3441

(index_memo, compression_parent, parent_keys,

2558

3442

record_details) = details

2559

self._revision_id_graph[key] = parents

3443

self._parent_map[key] = parent_keys

3444

self._heads_provider = None

2560

3445

records.append((key, index_memo))

2561

3446

# Do we actually need to check _annotated_lines?

2562

pending.update(p for p in parents

2563

if p not in self._all_build_details)

3447

pending.update([p for p in parent_keys

3448

if p not in self._all_build_details])

3449

if parent_keys:

3450

for parent_key in parent_keys:

3451

if parent_key in self._num_needed_children:

3452

self._num_needed_children[parent_key] += 1

3453

else:

3454

self._num_needed_children[parent_key] = 1

2564

3455

if compression_parent:

2565

self._compression_children.setdefault(compression_parent,

2566

[]).append(key)

2567

if parents:

2568

for parent in parents:

2569

self._annotate_children.setdefault(parent,

2570

[]).append(key)

2571

num_gens = generation - kept_generation

2572

if ((num_gens >= self._generations_until_keep)

2573

and len(parents) > 1):

2574

kept_generation = generation

2575

self._nodes_to_keep_annotations.add(key)

3456

if compression_parent in self._num_compression_children:

3457

self._num_compression_children[compression_parent] += 1

3458

else:

3459

self._num_compression_children[compression_parent] = 1

2576

3460

2577

3461

missing_versions = this_iteration.difference(build_details.keys())

2578

self._ghosts.update(missing_versions)

2579

for missing_version in missing_versions:

2580

# add a key, no parents

2581

self._revision_id_graph[missing_version] = ()

2582

pending.discard(missing_version) # don't look for it

2583

if self._ghosts.intersection(self._compression_children):

2584

raise KnitCorrupt(

2585

"We cannot have nodes which have a ghost compression parent:\n"

2586

"ghosts: %r\n"

2587

"compression children: %r"

2588

% (self._ghosts, self._compression_children))

2589

# Cleanout anything that depends on a ghost so that we don't wait for

2590

# the ghost to show up

2591

for node in self._ghosts:

2592

if node in self._annotate_children:

2593

# We won't be building this node

2594

del self._annotate_children[node]

3462

if missing_versions:

3463

for key in missing_versions:

3464

if key in self._parent_map and key in self._text_cache:

3465

# We already have this text ready, we just need to

3466

# yield it later so we get it annotated

3467

ann_keys.add(key)

3468

parent_keys = self._parent_map[key]

3469

for parent_key in parent_keys:

3470

if parent_key in self._num_needed_children:

3471

self._num_needed_children[parent_key] += 1

3472

else:

3473

self._num_needed_children[parent_key] = 1

3474

pending.update([p for p in parent_keys

3475

if p not in self._all_build_details])

3476

else:

3477

raise errors.RevisionNotPresent(key, self._vf)

2595

3478

# Generally we will want to read the records in reverse order, because

2596

3479

# we find the parent nodes after the children

2597

3480

records.reverse()

2598

return records

2599

2600

def _annotate_records(self, records):

2601

"""Build the annotations for the listed records."""

3481

return records, ann_keys

3482

3483

def _get_needed_texts(self, key, pb=None):

3484

# if True or len(self._vf._fallback_vfs) > 0:

3485

if len(self._vf._fallback_vfs) > 0:

3486

# If we have fallbacks, go to the generic path

3487

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3488

yield v

3489

return

3490

while True:

3491

try:

3492

records, ann_keys = self._get_build_graph(key)

3493

for idx, (sub_key, text, num_lines) in enumerate(

3494

self._extract_texts(records)):

3495

if pb is not None:

3496

pb.update('annotating', idx, len(records))

3497

yield sub_key, text, num_lines

3498

for sub_key in ann_keys:

3499

text = self._text_cache[sub_key]

3500

num_lines = len(text) # bad assumption

3501

yield sub_key, text, num_lines

3502

return

3503

except errors.RetryWithNewPacks, e:

3504

self._vf._access.reload_or_raise(e)

3505

# The cached build_details are no longer valid

3506

self._all_build_details.clear()

3507

3508

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3509

parent_lines = self._text_cache[compression_parent]

3510

blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))

3511

self._matching_blocks[(key, compression_parent)] = blocks

3512

3513

def _expand_record(self, key, parent_keys, compression_parent, record,

3514

record_details):

3515

delta = None

3516

if compression_parent:

3517

if compression_parent not in self._content_objects:

3518

# Waiting for the parent

3519

self._pending_deltas.setdefault(compression_parent, []).append(

3520

(key, parent_keys, record, record_details))

3521

return None

3522

# We have the basis parent, so expand the delta

3523

num = self._num_compression_children[compression_parent]

3524

num -= 1

3525

if num == 0:

3526

base_content = self._content_objects.pop(compression_parent)

3527

self._num_compression_children.pop(compression_parent)

3528

else:

3529

self._num_compression_children[compression_parent] = num

3530

base_content = self._content_objects[compression_parent]

3531

# It is tempting to want to copy_base_content=False for the last

3532

# child object. However, whenever noeol=False,

3533

# self._text_cache[parent_key] is content._lines. So mutating it

3534

# gives very bad results.

3535

# The alternative is to copy the lines into text cache, but then we

3536

# are copying anyway, so just do it here.

3537

content, delta = self._vf._factory.parse_record(

3538

key, record, record_details, base_content,

3539

copy_base_content=True)

3540

else:

3541

# Fulltext record

3542

content, _ = self._vf._factory.parse_record(

3543

key, record, record_details, None)

3544

if self._num_compression_children.get(key, 0) > 0:

3545

self._content_objects[key] = content

3546

lines = content.text()

3547

self._text_cache[key] = lines

3548

if delta is not None:

3549

self._cache_delta_blocks(key, compression_parent, delta, lines)

3550

return lines

3551

3552

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3553

"""Get the list of annotations for the parent, and the matching lines.

3554

3555

:param text: The opaque value given by _get_needed_texts

3556

:param parent_key: The key for the parent text

3557

:return: (parent_annotations, matching_blocks)

3558

parent_annotations is a list as long as the number of lines in

3559

parent

3560

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3561

indicating which lines match between the two texts

3562

"""

3563

block_key = (key, parent_key)

3564

if block_key in self._matching_blocks:

3565

blocks = self._matching_blocks.pop(block_key)

3566

parent_annotations = self._annotations_cache[parent_key]

3567

return parent_annotations, blocks

3568

return annotate.Annotator._get_parent_annotations_and_matches(self,

3569

key, text, parent_key)

3570

3571

def _process_pending(self, key):

3572

"""The content for 'key' was just processed.

3573

3574

Determine if there is any more pending work to be processed.

3575

"""

3576

to_return = []

3577

if key in self._pending_deltas:

3578

compression_parent = key

3579

children = self._pending_deltas.pop(key)

3580

for child_key, parent_keys, record, record_details in children:

3581

lines = self._expand_record(child_key, parent_keys,

3582

compression_parent,

3583

record, record_details)

3584

if self._check_ready_for_annotations(child_key, parent_keys):

3585

to_return.append(child_key)

3586

# Also check any children that are waiting for this parent to be

3587

# annotation ready

3588

if key in self._pending_annotation:

3589

children = self._pending_annotation.pop(key)

3590

to_return.extend([c for c, p_keys in children

3591

if self._check_ready_for_annotations(c, p_keys)])

3592

return to_return

3593

3594

def _check_ready_for_annotations(self, key, parent_keys):

3595

"""return true if this text is ready to be yielded.

3596

3597

Otherwise, this will return False, and queue the text into

3598

self._pending_annotation

3599

"""

3600

for parent_key in parent_keys:

3601

if parent_key not in self._annotations_cache:

3602

# still waiting on at least one parent text, so queue it up

3603

# Note that if there are multiple parents, we need to wait

3604

# for all of them.

3605

self._pending_annotation.setdefault(parent_key,

3606

[]).append((key, parent_keys))

3607

return False

3608

return True

3609

3610

def _extract_texts(self, records):

3611

"""Extract the various texts needed based on records"""

2602

3612

# We iterate in the order read, rather than a strict order requested

2603

3613

# However, process what we can, and put off to the side things that

2604

3614

# still need parents, cleaning them up when those parents are

2605

3615

# processed.

2606

for (rev_id, record,

2607

digest) in self._knit._read_records_iter(records):

2608

if rev_id in self._annotated_lines:

3616

# Basic data flow:

3617

# 1) As 'records' are read, see if we can expand these records into

3618

# Content objects (and thus lines)

3619

# 2) If a given line-delta is waiting on its compression parent, it

3620

# gets queued up into self._pending_deltas, otherwise we expand

3621

# it, and put it into self._text_cache and self._content_objects

3622

# 3) If we expanded the text, we will then check to see if all

3623

# parents have also been processed. If so, this text gets yielded,

3624

# else this record gets set aside into pending_annotation

3625

# 4) Further, if we expanded the text in (2), we will then check to

3626

# see if there are any children in self._pending_deltas waiting to

3627

# also be processed. If so, we go back to (2) for those

3628

# 5) Further again, if we yielded the text, we can then check if that

3629

# 'unlocks' any of the texts in pending_annotations, which should

3630

# then get yielded as well

3631

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3632

# compression child could unlock yet another, and yielding a fulltext

3633

# will also 'unlock' the children that are waiting on that annotation.

3634

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3635

# if other parents are also waiting.)

3636

# We want to yield content before expanding child content objects, so

3637

# that we know when we can re-use the content lines, and the annotation

3638

# code can know when it can stop caching fulltexts, as well.

3639

3640

# Children that are missing their compression parent

3641

pending_deltas = {}

3642

for (key, record, digest) in self._vf._read_records_iter(records):

3643

# ghosts?

3644

details = self._all_build_details[key]

3645

(_, compression_parent, parent_keys, record_details) = details

3646

lines = self._expand_record(key, parent_keys, compression_parent,

3647

record, record_details)

3648

if lines is None:

3649

# Pending delta should be queued up

2609

3650

continue

2610

parent_ids = self._revision_id_graph[rev_id]

2611

parent_ids = [p for p in parent_ids if p not in self._ghosts]

2612

details = self._all_build_details[rev_id]

2613

(index_memo, compression_parent, parents,

2614

record_details) = details

2615

nodes_to_annotate = []

2616

# TODO: Remove the punning between compression parents, and

2617

# parent_ids, we should be able to do this without assuming

2618

# the build order

2619

if len(parent_ids) == 0:

2620

# There are no parents for this node, so just add it

2621

# TODO: This probably needs to be decoupled

2622

fulltext_content, delta = self._knit._factory.parse_record(

2623

rev_id, record, record_details, None)

2624

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

2625

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

2626

parent_ids, left_matching_blocks=None))

2627

else:

2628

child = (rev_id, parent_ids, record)

2629

# Check if all the parents are present

2630

self._check_parents(child, nodes_to_annotate)

2631

while nodes_to_annotate:

2632

# Should we use a queue here instead of a stack?

2633

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2634

(index_memo, compression_parent, parents,

2635

record_details) = self._all_build_details[rev_id]

2636

if compression_parent is not None:

2637

comp_children = self._compression_children[compression_parent]

2638

if rev_id not in comp_children:

2639

raise AssertionError("%r not in compression children %r"

2640

% (rev_id, comp_children))

2641

# If there is only 1 child, it is safe to reuse this

2642

# content

2643

reuse_content = (len(comp_children) == 1

2644

and compression_parent not in

2645

self._nodes_to_keep_annotations)

2646

if reuse_content:

2647

# Remove it from the cache since it will be changing

2648

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

2649

# Make sure to copy the fulltext since it might be

2650

# modified

2651

parent_fulltext = list(parent_fulltext_content.text())

2652

else:

2653

parent_fulltext_content = self._fulltext_contents[compression_parent]

2654

parent_fulltext = parent_fulltext_content.text()

2655

comp_children.remove(rev_id)

2656

fulltext_content, delta = self._knit._factory.parse_record(

2657

rev_id, record, record_details,

2658

parent_fulltext_content,

2659

copy_base_content=(not reuse_content))

2660

fulltext = self._add_fulltext_content(rev_id,

2661

fulltext_content)

2662

blocks = KnitContent.get_line_delta_blocks(delta,

2663

parent_fulltext, fulltext)

2664

else:

2665

fulltext_content = self._knit._factory.parse_fulltext(

2666

record, rev_id)

2667

fulltext = self._add_fulltext_content(rev_id,

2668

fulltext_content)

2669

blocks = None

2670

nodes_to_annotate.extend(

2671

self._add_annotation(rev_id, fulltext, parent_ids,

2672

left_matching_blocks=blocks))

2673

2674

def _get_heads_provider(self):

2675

"""Create a heads provider for resolving ancestry issues."""

2676

if self._heads_provider is not None:

2677

return self._heads_provider

2678

parent_provider = _mod_graph.DictParentsProvider(

2679

self._revision_id_graph)

2680

graph_obj = _mod_graph.Graph(parent_provider)

2681

head_cache = _mod_graph.FrozenHeadsCache(graph_obj)

2682

self._heads_provider = head_cache

2683

return head_cache

2684

2685

def annotate(self, key):

2686

"""Return the annotated fulltext at the given key.

2687

2688

:param key: The key to annotate.

2689

"""

2690

if True or len(self._knit._fallback_vfs) > 0:

2691

# stacked knits can't use the fast path at present.

2692

return self._simple_annotate(key)

2693

records = self._get_build_graph(key)

2694

if key in self._ghosts:

2695

raise errors.RevisionNotPresent(key, self._knit)

2696

self._annotate_records(records)

2697

return self._annotated_lines[key]

2698

2699

def _simple_annotate(self, key):

2700

"""Return annotated fulltext, rediffing from the full texts.

2701

2702

This is slow but makes no assumptions about the repository

2703

being able to produce line deltas.

2704

"""

2705

# TODO: this code generates a parent maps of present ancestors; it

2706

# could be split out into a separate method, and probably should use

2707

# iter_ancestry instead. -- mbp and robertc 20080704

2708

graph = _mod_graph.Graph(self._knit)

2709

head_cache = _mod_graph.FrozenHeadsCache(graph)

2710

search = graph._make_breadth_first_searcher([key])

2711

keys = set()

2712

while True:

2713

try:

2714

present, ghosts = search.next_with_ghosts()

2715

except StopIteration:

2716

break

2717

keys.update(present)

2718

parent_map = self._knit.get_parent_map(keys)

2719

parent_cache = {}

2720

reannotate = annotate.reannotate

2721

for record in self._knit.get_record_stream(keys, 'topological', True):

2722

key = record.key

2723

fulltext = split_lines(record.get_bytes_as('fulltext'))

2724

parents = parent_map[key]

2725

if parents is not None:

2726

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2727

else:

2728

parent_lines = []

2729

parent_cache[key] = list(

2730

reannotate(parent_lines, fulltext, key, None, head_cache))

2731

try:

2732

return parent_cache[key]

2733

except KeyError, e:

2734

raise errors.RevisionNotPresent(key, self._knit)

2735

3651

# At this point, we may be able to yield this content, if all

3652

# parents are also finished

3653

yield_this_text = self._check_ready_for_annotations(key,

3654

parent_keys)

3655

if yield_this_text:

3656

# All parents present

3657

yield key, lines, len(lines)

3658

to_process = self._process_pending(key)

3659

while to_process:

3660

this_process = to_process

3661

to_process = []

3662

for key in this_process:

3663

lines = self._text_cache[key]

3664

yield key, lines, len(lines)

3665

to_process.extend(self._process_pending(key))

2736

3666

2737

3667

try:

2738

from bzrlib._knit_load_data_c import _load_data_c as _load_data

3668

from bzrlib._knit_load_data_pyx import _load_data_c as _load_data

2739

3669

except ImportError:

2740

3670

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »