~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2007-09-19 13:12:32 UTC
mto: (2592.3.153 repository)
mto: This revision was merged to the branch mainline in revision 2933.
Revision ID: mbp@sourcefrog.net-20070919131232-0gtp1q90fxz10ctn

move commit_write_group to RepositoryPackCollection

files added:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/multiparent.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/configuration.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/tutorial.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files renamed:
bzrlib/changeset.py => bzrlib/bundle/__init__.py

plugins/ => bzrlib/plugins/

bzrlib/store.py => bzrlib/store/__init__.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

tools/testweave.py => bzrlib/tests/test_weave.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

effbot/ => bzrlib/util/effbot/

elementtree/ => bzrlib/util/elementtree/

bzrlib/xml.py => bzrlib/xml_serializer.py

bzrlib/newinventory.py => contrib/newinventory.py

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_strings,

108

)

109

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

110

from bzrlib.tsort import topo_sort

111

import bzrlib.ui

112

import bzrlib.weave

113

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

115

116

# TODO: Split out code specific to this format into an associated object.

117

118

# TODO: Can we put in some kind of value to check that the index and data

119

# files belong together?

120

121

# TODO: accommodate binaries, perhaps by storing a byte count

122

123

# TODO: function to check whole file

124

125

# TODO: atomically append data, then measure backwards from the cursor

126

# position after writing to work out where it was located. we may need to

127

# bypass python file buffering.

128

129

DATA_SUFFIX = '.knit'

130

INDEX_SUFFIX = '.kndx'

131

132

133

class KnitContent(object):

134

"""Content of a knit version to which deltas can be applied."""

135

136

def annotate(self):

137

"""Return a list of (origin, text) tuples."""

138

return list(self.annotate_iter())

139

140

def line_delta_iter(self, new_lines):

141

"""Generate line-based delta from this content to new_lines."""

142

new_texts = new_lines.text()

143

old_texts = self.text()

144

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

145

for tag, i1, i2, j1, j2 in s.get_opcodes():

146

if tag == 'equal':

147

continue

148

# ofrom, oto, length, data

149

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

150

151

def line_delta(self, new_lines):

152

return list(self.line_delta_iter(new_lines))

153

154

@staticmethod

155

def get_line_delta_blocks(knit_delta, source, target):

156

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

157

target_len = len(target)

158

s_pos = 0

159

t_pos = 0

160

for s_begin, s_end, t_len, new_text in knit_delta:

161

true_n = s_begin - s_pos

162

n = true_n

163

if n > 0:

164

# knit deltas do not provide reliable info about whether the

165

# last line of a file matches, due to eol handling.

166

if source[s_pos + n -1] != target[t_pos + n -1]:

167

n-=1

168

if n > 0:

169

yield s_pos, t_pos, n

170

t_pos += t_len + true_n

171

s_pos = s_end

172

n = target_len - t_pos

173

if n > 0:

174

if source[s_pos + n -1] != target[t_pos + n -1]:

175

n-=1

176

if n > 0:

177

yield s_pos, t_pos, n

178

yield s_pos + (target_len - t_pos), target_len, 0

179

180

181

class AnnotatedKnitContent(KnitContent):

182

"""Annotated content."""

183

184

def __init__(self, lines):

185

self._lines = lines

186

187

def annotate_iter(self):

188

"""Yield tuples of (origin, text) for each content line."""

189

return iter(self._lines)

190

191

def strip_last_line_newline(self):

192

line = self._lines[-1][1].rstrip('\n')

193

self._lines[-1] = (self._lines[-1][0], line)

194

195

def text(self):

196

return [text for origin, text in self._lines]

197

198

def copy(self):

199

return AnnotatedKnitContent(self._lines[:])

200

201

202

class PlainKnitContent(KnitContent):

203

"""Unannotated content.

204

205

When annotate[_iter] is called on this content, the same version is reported

206

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

207

objects.

208

"""

209

210

def __init__(self, lines, version_id):

211

self._lines = lines

212

self._version_id = version_id

213

214

def annotate_iter(self):

215

"""Yield tuples of (origin, text) for each content line."""

216

for line in self._lines:

217

yield self._version_id, line

218

219

def copy(self):

220

return PlainKnitContent(self._lines[:], self._version_id)

221

222

def strip_last_line_newline(self):

223

self._lines[-1] = self._lines[-1].rstrip('\n')

224

225

def text(self):

226

return self._lines

227

228

229

class KnitAnnotateFactory(object):

230

"""Factory for creating annotated Content objects."""

231

232

annotated = True

233

234

def make(self, lines, version_id):

235

num_lines = len(lines)

236

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

237

238

def parse_fulltext(self, content, version_id):

239

"""Convert fulltext to internal representation

240

241

fulltext content is of the format

242

revid(utf8) plaintext\n

243

internal representation is of the format:

244

(revid, plaintext)

245

"""

246

# TODO: jam 20070209 The tests expect this to be returned as tuples,

247

# but the code itself doesn't really depend on that.

248

# Figure out a way to not require the overhead of turning the

249

# list back into tuples.

250

lines = [tuple(line.split(' ', 1)) for line in content]

251

return AnnotatedKnitContent(lines)

252

253

def parse_line_delta_iter(self, lines):

254

return iter(self.parse_line_delta(lines))

255

256

def parse_line_delta(self, lines, version_id):

257

"""Convert a line based delta into internal representation.

258

259

line delta is in the form of:

260

intstart intend intcount

261

1..count lines:

262

revid(utf8) newline\n

263

internal representation is

264

(start, end, count, [1..count tuples (revid, newline)])

265

"""

266

result = []

267

lines = iter(lines)

268

next = lines.next

269

270

cache = {}

271

def cache_and_return(line):

272

origin, text = line.split(' ', 1)

273

return cache.setdefault(origin, origin), text

274

275

# walk through the lines parsing.

276

for header in lines:

277

start, end, count = [int(n) for n in header.split(',')]

278

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

279

result.append((start, end, count, contents))

280

return result

281

282

def get_fulltext_content(self, lines):

283

"""Extract just the content lines from a fulltext."""

284

return (line.split(' ', 1)[1] for line in lines)

285

286

def get_linedelta_content(self, lines):

287

"""Extract just the content from a line delta.

288

289

This doesn't return all of the extra information stored in a delta.

290

Only the actual content lines.

291

"""

292

lines = iter(lines)

293

next = lines.next

294

for header in lines:

295

header = header.split(',')

296

count = int(header[2])

297

for i in xrange(count):

298

origin, text = next().split(' ', 1)

299

yield text

300

301

def lower_fulltext(self, content):

302

"""convert a fulltext content record into a serializable form.

303

304

see parse_fulltext which this inverts.

305

"""

306

# TODO: jam 20070209 We only do the caching thing to make sure that

307

# the origin is a valid utf-8 line, eventually we could remove it

308

return ['%s %s' % (o, t) for o, t in content._lines]

309

310

def lower_line_delta(self, delta):

311

"""convert a delta into a serializable form.

312

313

See parse_line_delta which this inverts.

314

"""

315

# TODO: jam 20070209 We only do the caching thing to make sure that

316

# the origin is a valid utf-8 line, eventually we could remove it

317

out = []

318

for start, end, c, lines in delta:

319

out.append('%d,%d,%d\n' % (start, end, c))

320

out.extend(origin + ' ' + text

321

for origin, text in lines)

322

return out

323

324

def annotate_iter(self, knit, version_id):

325

content = knit._get_content(version_id)

326

return content.annotate_iter()

327

328

329

class KnitPlainFactory(object):

330

"""Factory for creating plain Content objects."""

331

332

annotated = False

333

334

def make(self, lines, version_id):

335

return PlainKnitContent(lines, version_id)

336

337

def parse_fulltext(self, content, version_id):

338

"""This parses an unannotated fulltext.

339

340

Note that this is not a noop - the internal representation

341

has (versionid, line) - its just a constant versionid.

342

"""

343

return self.make(content, version_id)

344

345

def parse_line_delta_iter(self, lines, version_id):

346

cur = 0

347

num_lines = len(lines)

348

while cur < num_lines:

349

header = lines[cur]

350

cur += 1

351

start, end, c = [int(n) for n in header.split(',')]

352

yield start, end, c, lines[cur:cur+c]

353

cur += c

354

355

def parse_line_delta(self, lines, version_id):

356

return list(self.parse_line_delta_iter(lines, version_id))

357

358

def get_fulltext_content(self, lines):

359

"""Extract just the content lines from a fulltext."""

360

return iter(lines)

361

362

def get_linedelta_content(self, lines):

363

"""Extract just the content from a line delta.

364

365

This doesn't return all of the extra information stored in a delta.

366

Only the actual content lines.

367

"""

368

lines = iter(lines)

369

next = lines.next

370

for header in lines:

371

header = header.split(',')

372

count = int(header[2])

373

for i in xrange(count):

374

yield next()

375

376

def lower_fulltext(self, content):

377

return content.text()

378

379

def lower_line_delta(self, delta):

380

out = []

381

for start, end, c, lines in delta:

382

out.append('%d,%d,%d\n' % (start, end, c))

383

out.extend(lines)

384

return out

385

386

def annotate_iter(self, knit, version_id):

387

return annotate_knit(knit, version_id)

388

389

390

def make_empty_knit(transport, relpath):

391

"""Construct a empty knit at the specified location."""

392

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

393

394

395

class KnitVersionedFile(VersionedFile):

396

"""Weave-like structure with faster random access.

397

398

A knit stores a number of texts and a summary of the relationships

399

between them. Texts are identified by a string version-id. Texts

400

are normally stored and retrieved as a series of lines, but can

401

also be passed as single strings.

402

403

Lines are stored with the trailing newline (if any) included, to

404

avoid special cases for files with no final newline. Lines are

405

composed of 8-bit characters, not unicode. The combination of

406

these approaches should mean any 'binary' file can be safely

407

stored and retrieved.

408

"""

409

410

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

411

factory=None, delta=True, create=False, create_parent_dir=False,

412

delay_create=False, dir_mode=None, index=None, access_method=None):

413

"""Construct a knit at location specified by relpath.

414

415

:param create: If not True, only open an existing knit.

416

:param create_parent_dir: If True, create the parent directory if

417

creating the file fails. (This is used for stores with

418

hash-prefixes that may not exist yet)

419

:param delay_create: The calling code is aware that the knit won't

420

actually be created until the first data is stored.

421

:param index: An index to use for the knit.

422

"""

423

if access_mode is None:

424

access_mode = 'w'

425

super(KnitVersionedFile, self).__init__(access_mode)

426

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

427

self.transport = transport

428

self.filename = relpath

429

self.factory = factory or KnitAnnotateFactory()

430

self.writable = (access_mode == 'w')

431

self.delta = delta

432

433

self._max_delta_chain = 200

434

435

if index is None:

436

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

437

access_mode, create=create, file_mode=file_mode,

438

create_parent_dir=create_parent_dir, delay_create=delay_create,

439

dir_mode=dir_mode)

440

else:

441

self._index = index

442

if access_method is None:

443

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

444

((create and not len(self)) and delay_create), create_parent_dir)

445

else:

446

_access = access_method

447

if create and not len(self) and not delay_create:

448

_access.create()

449

self._data = _KnitData(_access)

450

451

def __repr__(self):

452

return '%s(%s)' % (self.__class__.__name__,

453

self.transport.abspath(self.filename))

454

455

def _check_should_delta(self, first_parents):

456

"""Iterate back through the parent listing, looking for a fulltext.

457

458

This is used when we want to decide whether to add a delta or a new

459

fulltext. It searches for _max_delta_chain parents. When it finds a

460

fulltext parent, it sees if the total size of the deltas leading up to

461

it is large enough to indicate that we want a new full text anyway.

462

463

Return True if we should create a new delta, False if we should use a

464

full text.

465

"""

466

delta_size = 0

467

fulltext_size = None

468

delta_parents = first_parents

469

for count in xrange(self._max_delta_chain):

470

parent = delta_parents[0]

471

method = self._index.get_method(parent)

472

index, pos, size = self._index.get_position(parent)

473

if method == 'fulltext':

474

fulltext_size = size

475

break

476

delta_size += size

477

delta_parents = self._index.get_parents(parent)

478

else:

479

# We couldn't find a fulltext, so we must create a new one

480

return False

481

482

return fulltext_size > delta_size

483

484

def _add_raw_records(self, records, data):

485

"""Add all the records 'records' with data pre-joined in 'data'.

486

487

:param records: A list of tuples(version_id, options, parents, size).

488

:param data: The data for the records. When it is written, the records

489

are adjusted to have pos pointing into data by the sum of

490

the preceding records sizes.

491

"""

492

# write all the data

493

raw_record_sizes = [record[3] for record in records]

494

positions = self._data.add_raw_records(raw_record_sizes, data)

495

offset = 0

496

index_entries = []

497

for (version_id, options, parents, size), access_memo in zip(

498

records, positions):

499

index_entries.append((version_id, options, access_memo, parents))

500

if self._data._do_cache:

501

self._data._cache[version_id] = data[offset:offset+size]

502

offset += size

503

self._index.add_versions(index_entries)

504

505

def enable_cache(self):

506

"""Start caching data for this knit"""

507

self._data.enable_cache()

508

509

def clear_cache(self):

510

"""Clear the data cache only."""

511

self._data.clear_cache()

512

513

def copy_to(self, name, transport):

514

"""See VersionedFile.copy_to()."""

515

# copy the current index to a temp index to avoid racing with local

516

# writes

517

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

518

self.transport.get(self._index._filename))

519

# copy the data file

520

f = self._data._open_file()

521

try:

522

transport.put_file(name + DATA_SUFFIX, f)

523

finally:

524

f.close()

525

# move the copied index into place

526

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

527

528

def create_empty(self, name, transport, mode=None):

529

return KnitVersionedFile(name, transport, factory=self.factory,

530

delta=self.delta, create=True)

531

532

def _fix_parents(self, version_id, new_parents):

533

"""Fix the parents list for version.

534

535

This is done by appending a new version to the index

536

with identical data except for the parents list.

537

the parents list must be a superset of the current

538

list.

539

"""

540

current_values = self._index._cache[version_id]

541

assert set(current_values[4]).difference(set(new_parents)) == set()

542

self._index.add_version(version_id,

543

current_values[1],

544

(None, current_values[2], current_values[3]),

545

new_parents)

546

547

def get_data_stream(self, required_versions):

548

"""Get a data stream for the specified versions.

549

550

Versions may be returned in any order, not necessarily the order

551

specified.

552

553

:param required_versions: The exact set of versions to be extracted.

554

Unlike some other knit methods, this is not used to generate a

555

transitive closure, rather it is used precisely as given.

556

557

:returns: format_signature, list of (version, options, length, parents),

558

reader_callable.

559

"""

560

required_versions = set([osutils.safe_revision_id(v) for v in

561

required_versions])

562

# we don't care about inclusions, the caller cares.

563

# but we need to setup a list of records to visit.

564

for version_id in required_versions:

565

if not self.has_version(version_id):

566

raise RevisionNotPresent(version_id, self.filename)

567

# Pick the desired versions out of the index in oldest-to-newest order

568

version_list = []

569

for version_id in self.versions():

570

if version_id in required_versions:

571

version_list.append(version_id)

572

573

# create the list of version information for the result

574

copy_queue_records = []

575

copy_set = set()

576

result_version_list = []

577

for version_id in version_list:

578

options = self._index.get_options(version_id)

579

parents = self._index.get_parents_with_ghosts(version_id)

580

index_memo = self._index.get_position(version_id)

581

copy_queue_records.append((version_id, index_memo))

582

none, data_pos, data_size = index_memo

583

copy_set.add(version_id)

584

# version, options, length, parents

585

result_version_list.append((version_id, options, data_size,

586

parents))

587

588

# Read the compressed record data.

589

# XXX:

590

# From here down to the return should really be logic in the returned

591

# callable -- in a class that adapts read_records_iter_raw to read

592

# requests.

593

raw_datum = []

594

for (version_id, raw_data), \

595

(version_id2, options, _, parents) in \

596

izip(self._data.read_records_iter_raw(copy_queue_records),

597

result_version_list):

598

assert version_id == version_id2, 'logic error, inconsistent results'

599

raw_datum.append(raw_data)

600

pseudo_file = StringIO(''.join(raw_datum))

601

def read(length):

602

if length is None:

603

return pseudo_file.read()

604

else:

605

return pseudo_file.read(length)

606

return (self.get_format_signature(), result_version_list, read)

607

608

def _extract_blocks(self, version_id, source, target):

609

if self._index.get_method(version_id) != 'line-delta':

610

return None

611

parent, sha1, noeol, delta = self.get_delta(version_id)

612

return KnitContent.get_line_delta_blocks(delta, source, target)

613

614

def get_delta(self, version_id):

615

"""Get a delta for constructing version from some other version."""

616

version_id = osutils.safe_revision_id(version_id)

617

self.check_not_reserved_id(version_id)

618

parents = self.get_parents(version_id)

619

if len(parents):

620

parent = parents[0]

621

else:

622

parent = None

623

index_memo = self._index.get_position(version_id)

624

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

625

noeol = 'no-eol' in self._index.get_options(version_id)

626

if 'fulltext' == self._index.get_method(version_id):

627

new_content = self.factory.parse_fulltext(data, version_id)

628

if parent is not None:

629

reference_content = self._get_content(parent)

630

old_texts = reference_content.text()

631

else:

632

old_texts = []

633

new_texts = new_content.text()

634

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

635

new_texts)

636

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

637

else:

638

delta = self.factory.parse_line_delta(data, version_id)

639

return parent, sha1, noeol, delta

640

641

def get_format_signature(self):

642

"""See VersionedFile.get_format_signature()."""

643

if self.factory.annotated:

644

annotated_part = "annotated"

645

else:

646

annotated_part = "plain"

647

return "knit-%s" % (annotated_part,)

648

649

def get_graph_with_ghosts(self):

650

"""See VersionedFile.get_graph_with_ghosts()."""

651

graph_items = self._index.get_graph()

652

return dict(graph_items)

653

654

def get_sha1(self, version_id):

655

return self.get_sha1s([version_id])[0]

656

657

def get_sha1s(self, version_ids):

658

"""See VersionedFile.get_sha1()."""

659

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

660

record_map = self._get_record_map(version_ids)

661

# record entry 2 is the 'digest'.

662

return [record_map[v][2] for v in version_ids]

663

664

@staticmethod

665

def get_suffixes():

666

"""See VersionedFile.get_suffixes()."""

667

return [DATA_SUFFIX, INDEX_SUFFIX]

668

669

def has_ghost(self, version_id):

670

"""True if there is a ghost reference in the file to version_id."""

671

version_id = osutils.safe_revision_id(version_id)

672

# maybe we have it

673

if self.has_version(version_id):

674

return False

675

# optimisable if needed by memoising the _ghosts set.

676

items = self._index.get_graph()

677

for node, parents in items:

678

for parent in parents:

679

if parent not in self._index._cache:

680

if parent == version_id:

681

return True

682

return False

683

684

def insert_data_stream(self, (format, data_list, reader_callable)):

685

"""Insert knit records from a data stream into this knit.

686

687

If a version in the stream is already present in this knit, it will not

688

be inserted a second time. It will be checked for consistency with the

689

stored version however, and may cause a KnitCorrupt error to be raised

690

if the data in the stream disagrees with the already stored data.

691

692

:seealso: get_data_stream

693

"""

694

if format != self.get_format_signature():

695

trace.mutter('incompatible format signature inserting to %r', self)

696

raise KnitDataStreamIncompatible(

697

format, self.get_format_signature())

698

699

for version_id, options, length, parents in data_list:

700

if self.has_version(version_id):

701

# First check: the list of parents.

702

my_parents = self.get_parents_with_ghosts(version_id)

703

if my_parents != parents:

704

# XXX: KnitCorrupt is not quite the right exception here.

705

raise KnitCorrupt(

706

self.filename,

707

'parents list %r from data stream does not match '

708

'already recorded parents %r for %s'

709

% (parents, my_parents, version_id))

710

711

# Also check the SHA-1 of the fulltext this content will

712

# produce.

713

raw_data = reader_callable(length)

714

my_fulltext_sha1 = self.get_sha1(version_id)

715

df, rec = self._data._parse_record_header(version_id, raw_data)

716

stream_fulltext_sha1 = rec[3]

717

if my_fulltext_sha1 != stream_fulltext_sha1:

718

# Actually, we don't know if it's this knit that's corrupt,

719

# or the data stream we're trying to insert.

720

raise KnitCorrupt(

721

self.filename, 'sha-1 does not match %s' % version_id)

722

else:

723

self._add_raw_records(

724

[(version_id, options, parents, length)],

725

reader_callable(length))

726

727

def versions(self):

728

"""See VersionedFile.versions."""

729

if 'evil' in debug.debug_flags:

730

trace.mutter_callsite(2, "versions scales with size of history")

731

return self._index.get_versions()

732

733

def has_version(self, version_id):

734

"""See VersionedFile.has_version."""

735

if 'evil' in debug.debug_flags:

736

trace.mutter_callsite(2, "has_version is a LBYL scenario")

737

version_id = osutils.safe_revision_id(version_id)

738

return self._index.has_version(version_id)

739

740

__contains__ = has_version

741

742

def _merge_annotations(self, content, parents, parent_texts={},

743

delta=None, annotated=None,

744

left_matching_blocks=None):

745

"""Merge annotations for content. This is done by comparing

746

the annotations based on changed to the text.

747

"""

748

if left_matching_blocks is not None:

749

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

750

else:

751

delta_seq = None

752

if annotated:

753

for parent_id in parents:

754

merge_content = self._get_content(parent_id, parent_texts)

755

if (parent_id == parents[0] and delta_seq is not None):

756

seq = delta_seq

757

else:

758

seq = patiencediff.PatienceSequenceMatcher(

759

None, merge_content.text(), content.text())

760

for i, j, n in seq.get_matching_blocks():

761

if n == 0:

762

continue

763

# this appears to copy (origin, text) pairs across to the

764

# new content for any line that matches the last-checked

765

# parent.

766

content._lines[j:j+n] = merge_content._lines[i:i+n]

767

if delta:

768

if delta_seq is None:

769

reference_content = self._get_content(parents[0], parent_texts)

770

new_texts = content.text()

771

old_texts = reference_content.text()

772

delta_seq = patiencediff.PatienceSequenceMatcher(

773

None, old_texts, new_texts)

774

return self._make_line_delta(delta_seq, content)

775

776

def _make_line_delta(self, delta_seq, new_content):

777

"""Generate a line delta from delta_seq and new_content."""

778

diff_hunks = []

779

for op in delta_seq.get_opcodes():

780

if op[0] == 'equal':

781

continue

782

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

783

return diff_hunks

784

785

def _get_components_positions(self, version_ids):

786

"""Produce a map of position data for the components of versions.

787

788

This data is intended to be used for retrieving the knit records.

789

790

A dict of version_id to (method, data_pos, data_size, next) is

791

returned.

792

method is the way referenced data should be applied.

793

data_pos is the position of the data in the knit.

794

data_size is the size of the data in the knit.

795

next is the build-parent of the version, or None for fulltexts.

796

"""

797

component_data = {}

798

for version_id in version_ids:

799

cursor = version_id

800

801

while cursor is not None and cursor not in component_data:

802

method = self._index.get_method(cursor)

803

if method == 'fulltext':

804

next = None

805

else:

806

next = self.get_parents(cursor)[0]

807

index_memo = self._index.get_position(cursor)

808

component_data[cursor] = (method, index_memo, next)

809

cursor = next

810

return component_data

811

812

def _get_content(self, version_id, parent_texts={}):

813

"""Returns a content object that makes up the specified

814

version."""

815

cached_version = parent_texts.get(version_id, None)

816

if cached_version is not None:

817

if not self.has_version(version_id):

818

raise RevisionNotPresent(version_id, self.filename)

819

return cached_version

820

821

text_map, contents_map = self._get_content_maps([version_id])

822

return contents_map[version_id]

823

824

def _check_versions_present(self, version_ids):

825

"""Check that all specified versions are present."""

826

self._index.check_versions_present(version_ids)

827

828

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

829

nostore_sha, random_id, check_content):

830

"""See VersionedFile.add_lines_with_ghosts()."""

831

self._check_add(version_id, lines, random_id, check_content)

832

return self._add(version_id, lines, parents, self.delta,

833

parent_texts, None, nostore_sha)

834

835

def _add_lines(self, version_id, parents, lines, parent_texts,

836

left_matching_blocks, nostore_sha, random_id, check_content):

837

"""See VersionedFile.add_lines."""

838

self._check_add(version_id, lines, random_id, check_content)

839

self._check_versions_present(parents)

840

return self._add(version_id, lines[:], parents, self.delta,

841

parent_texts, left_matching_blocks, nostore_sha)

842

843

def _check_add(self, version_id, lines, random_id, check_content):

844

"""check that version_id and lines are safe to add."""

845

if contains_whitespace(version_id):

846

raise InvalidRevisionId(version_id, self.filename)

847

self.check_not_reserved_id(version_id)

848

# Technically this could be avoided if we are happy to allow duplicate

849

# id insertion when other things than bzr core insert texts, but it

850

# seems useful for folk using the knit api directly to have some safety

851

# blanket that we can disable.

852

if not random_id and self.has_version(version_id):

853

raise RevisionAlreadyPresent(version_id, self.filename)

854

if check_content:

855

self._check_lines_not_unicode(lines)

856

self._check_lines_are_lines(lines)

857

858

def _add(self, version_id, lines, parents, delta, parent_texts,

859

left_matching_blocks, nostore_sha):

860

"""Add a set of lines on top of version specified by parents.

861

862

If delta is true, compress the text as a line-delta against

863

the first parent.

864

865

Any versions not present will be converted into ghosts.

866

"""

867

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

868

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

869

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

870

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

871

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

872

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

873

# +1383 0 8.0370 8.0370 +<len>

874

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

875

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

876

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

877

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

878

879

present_parents = []

880

if parent_texts is None:

881

parent_texts = {}

882

for parent in parents:

883

if self.has_version(parent):

884

present_parents.append(parent)

885

886

# can only compress against the left most present parent.

887

if (delta and

888

(len(present_parents) == 0 or

889

present_parents[0] != parents[0])):

890

delta = False

891

892

digest = sha_strings(lines)

893

if nostore_sha == digest:

894

raise errors.ExistingContent

895

text_length = sum(map(len, lines))

896

options = []

897

if lines:

898

if lines[-1][-1] != '\n':

899

# copy the contents of lines.

900

lines = lines[:]

901

options.append('no-eol')

902

lines[-1] = lines[-1] + '\n'

903

904

if delta:

905

# To speed the extract of texts the delta chain is limited

906

# to a fixed number of deltas. This should minimize both

907

# I/O and the time spend applying deltas.

908

delta = self._check_should_delta(present_parents)

909

910

assert isinstance(version_id, str)

911

content = self.factory.make(lines, version_id)

912

if delta or (self.factory.annotated and len(present_parents) > 0):

913

# Merge annotations from parent texts if needed.

914

delta_hunks = self._merge_annotations(content, present_parents,

915

parent_texts, delta, self.factory.annotated,

916

left_matching_blocks)

917

918

if delta:

919

options.append('line-delta')

920

store_lines = self.factory.lower_line_delta(delta_hunks)

921

else:

922

options.append('fulltext')

923

store_lines = self.factory.lower_fulltext(content)

924

925

access_memo = self._data.add_record(version_id, digest, store_lines)

926

self._index.add_version(version_id, options, access_memo, parents)

927

return digest, text_length, content

928

929

def check(self, progress_bar=None):

930

"""See VersionedFile.check()."""

931

932

def _clone_text(self, new_version_id, old_version_id, parents):

933

"""See VersionedFile.clone_text()."""

934

# FIXME RBC 20060228 make fast by only inserting an index with null

935

# delta.

936

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

937

938

def get_lines(self, version_id):

939

"""See VersionedFile.get_lines()."""

940

return self.get_line_list([version_id])[0]

941

942

def _get_record_map(self, version_ids):

943

"""Produce a dictionary of knit records.

944

945

The keys are version_ids, the values are tuples of (method, content,

946

digest, next).

947

method is the way the content should be applied.

948

content is a KnitContent object.

949

digest is the SHA1 digest of this version id after all steps are done

950

next is the build-parent of the version, i.e. the leftmost ancestor.

951

If the method is fulltext, next will be None.

952

"""

953

position_map = self._get_components_positions(version_ids)

954

# c = component_id, m = method, i_m = index_memo, n = next

955

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

956

record_map = {}

957

for component_id, content, digest in \

958

self._data.read_records_iter(records):

959

method, index_memo, next = position_map[component_id]

960

record_map[component_id] = method, content, digest, next

961

962

return record_map

963

964

def get_text(self, version_id):

965

"""See VersionedFile.get_text"""

966

return self.get_texts([version_id])[0]

967

968

def get_texts(self, version_ids):

969

return [''.join(l) for l in self.get_line_list(version_ids)]

970

971

def get_line_list(self, version_ids):

972

"""Return the texts of listed versions as a list of strings."""

973

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

974

for version_id in version_ids:

975

self.check_not_reserved_id(version_id)

976

text_map, content_map = self._get_content_maps(version_ids)

977

return [text_map[v] for v in version_ids]

978

979

_get_lf_split_line_list = get_line_list

980

981

def _get_content_maps(self, version_ids):

982

"""Produce maps of text and KnitContents

983

984

:return: (text_map, content_map) where text_map contains the texts for

985

the requested versions and content_map contains the KnitContents.

986

Both dicts take version_ids as their keys.

987

"""

988

record_map = self._get_record_map(version_ids)

989

990

text_map = {}

991

content_map = {}

992

final_content = {}

993

for version_id in version_ids:

994

components = []

995

cursor = version_id

996

while cursor is not None:

997

method, data, digest, next = record_map[cursor]

998

components.append((cursor, method, data, digest))

999

if cursor in content_map:

1000

break

1001

cursor = next

1002

1003

content = None

1004

for component_id, method, data, digest in reversed(components):

1005

if component_id in content_map:

1006

content = content_map[component_id]

1007

else:

1008

if method == 'fulltext':

1009

assert content is None

1010

content = self.factory.parse_fulltext(data, version_id)

1011

elif method == 'line-delta':

1012

delta = self.factory.parse_line_delta(data, version_id)

1013

content = content.copy()

1014

content._lines = self._apply_delta(content._lines,

1015

delta)

1016

content_map[component_id] = content

1017

1018

if 'no-eol' in self._index.get_options(version_id):

1019

content = content.copy()

1020

content.strip_last_line_newline()

1021

final_content[version_id] = content

1022

1023

# digest here is the digest from the last applied component.

1024

text = content.text()

1025

if sha_strings(text) != digest:

1026

raise KnitCorrupt(self.filename,

1027

'sha-1 does not match %s' % version_id)

1028

1029

text_map[version_id] = text

1030

return text_map, final_content

1031

1032

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1033

pb=None):

1034

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1035

if version_ids is None:

1036

version_ids = self.versions()

1037

else:

1038

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1039

if pb is None:

1040

pb = progress.DummyProgress()

1041

# we don't care about inclusions, the caller cares.

1042

# but we need to setup a list of records to visit.

1043

# we need version_id, position, length

1044

version_id_records = []

1045

requested_versions = set(version_ids)

1046

# filter for available versions

1047

for version_id in requested_versions:

1048

if not self.has_version(version_id):

1049

raise RevisionNotPresent(version_id, self.filename)

1050

# get a in-component-order queue:

1051

for version_id in self.versions():

1052

if version_id in requested_versions:

1053

index_memo = self._index.get_position(version_id)

1054

version_id_records.append((version_id, index_memo))

1055

1056

total = len(version_id_records)

1057

for version_idx, (version_id, data, sha_value) in \

1058

enumerate(self._data.read_records_iter(version_id_records)):

1059

pb.update('Walking content.', version_idx, total)

1060

method = self._index.get_method(version_id)

1061

1062

assert method in ('fulltext', 'line-delta')

1063

if method == 'fulltext':

1064

line_iterator = self.factory.get_fulltext_content(data)

1065

else:

1066

line_iterator = self.factory.get_linedelta_content(data)

1067

for line in line_iterator:

1068

yield line

1069

1070

pb.update('Walking content.', total, total)

1071

1072

def iter_parents(self, version_ids):

1073

"""Iterate through the parents for many version ids.

1074

1075

:param version_ids: An iterable yielding version_ids.

1076

:return: An iterator that yields (version_id, parents). Requested

1077

version_ids not present in the versioned file are simply skipped.

1078

The order is undefined, allowing for different optimisations in

1079

the underlying implementation.

1080

"""

1081

version_ids = [osutils.safe_revision_id(version_id) for

1082

version_id in version_ids]

1083

return self._index.iter_parents(version_ids)

1084

1085

def num_versions(self):

1086

"""See VersionedFile.num_versions()."""

1087

return self._index.num_versions()

1088

1089

__len__ = num_versions

1090

1091

def annotate_iter(self, version_id):

1092

"""See VersionedFile.annotate_iter."""

1093

version_id = osutils.safe_revision_id(version_id)

1094

return self.factory.annotate_iter(self, version_id)

1095

1096

def get_parents(self, version_id):

1097

"""See VersionedFile.get_parents."""

1098

# perf notes:

1099

# optimism counts!

1100

# 52554 calls in 1264 872 internal down from 3674

1101

version_id = osutils.safe_revision_id(version_id)

1102

try:

1103

return self._index.get_parents(version_id)

1104

except KeyError:

1105

raise RevisionNotPresent(version_id, self.filename)

1106

1107

def get_parents_with_ghosts(self, version_id):

1108

"""See VersionedFile.get_parents."""

1109

version_id = osutils.safe_revision_id(version_id)

1110

try:

1111

return self._index.get_parents_with_ghosts(version_id)

1112

except KeyError:

1113

raise RevisionNotPresent(version_id, self.filename)

1114

1115

def get_ancestry(self, versions, topo_sorted=True):

1116

"""See VersionedFile.get_ancestry."""

1117

if isinstance(versions, basestring):

1118

versions = [versions]

1119

if not versions:

1120

return []

1121

versions = [osutils.safe_revision_id(v) for v in versions]

1122

return self._index.get_ancestry(versions, topo_sorted)

1123

1124

def get_ancestry_with_ghosts(self, versions):

1125

"""See VersionedFile.get_ancestry_with_ghosts."""

1126

if isinstance(versions, basestring):

1127

versions = [versions]

1128

if not versions:

1129

return []

1130

versions = [osutils.safe_revision_id(v) for v in versions]

1131

return self._index.get_ancestry_with_ghosts(versions)

1132

1133

def plan_merge(self, ver_a, ver_b):

1134

"""See VersionedFile.plan_merge."""

1135

ver_a = osutils.safe_revision_id(ver_a)

1136

ver_b = osutils.safe_revision_id(ver_b)

1137

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

1138

1139

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

1140

annotated_a = self.annotate(ver_a)

1141

annotated_b = self.annotate(ver_b)

1142

return merge._plan_annotate_merge(annotated_a, annotated_b,

1143

ancestors_a, ancestors_b)

1144

1145

1146

class _KnitComponentFile(object):

1147

"""One of the files used to implement a knit database"""

1148

1149

def __init__(self, transport, filename, mode, file_mode=None,

1150

create_parent_dir=False, dir_mode=None):

1151

self._transport = transport

1152

self._filename = filename

1153

self._mode = mode

1154

self._file_mode = file_mode

1155

self._dir_mode = dir_mode

1156

self._create_parent_dir = create_parent_dir

1157

self._need_to_create = False

1158

1159

def _full_path(self):

1160

"""Return the full path to this file."""

1161

return self._transport.base + self._filename

1162

1163

def check_header(self, fp):

1164

line = fp.readline()

1165

if line == '':

1166

# An empty file can actually be treated as though the file doesn't

1167

# exist yet.

1168

raise errors.NoSuchFile(self._full_path())

1169

if line != self.HEADER:

1170

raise KnitHeaderError(badline=line,

1171

filename=self._transport.abspath(self._filename))

1172

1173

def __repr__(self):

1174

return '%s(%s)' % (self.__class__.__name__, self._filename)

1175

1176

1177

class _KnitIndex(_KnitComponentFile):

1178

"""Manages knit index file.

1179

1180

The index is already kept in memory and read on startup, to enable

1181

fast lookups of revision information. The cursor of the index

1182

file is always pointing to the end, making it easy to append

1183

entries.

1184

1185

_cache is a cache for fast mapping from version id to a Index

1186

object.

1187

1188

_history is a cache for fast mapping from indexes to version ids.

1189

1190

The index data format is dictionary compressed when it comes to

1191

parent references; a index entry may only have parents that with a

1192

lover index number. As a result, the index is topological sorted.

1193

1194

Duplicate entries may be written to the index for a single version id

1195

if this is done then the latter one completely replaces the former:

1196

this allows updates to correct version and parent information.

1197

Note that the two entries may share the delta, and that successive

1198

annotations and references MUST point to the first entry.

1199

1200

The index file on disc contains a header, followed by one line per knit

1201

record. The same revision can be present in an index file more than once.

1202

The first occurrence gets assigned a sequence number starting from 0.

1203

1204

The format of a single line is

1205

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1206

REVISION_ID is a utf8-encoded revision id

1207

FLAGS is a comma separated list of flags about the record. Values include

1208

no-eol, line-delta, fulltext.

1209

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1210

that the the compressed data starts at.

1211

LENGTH is the ascii representation of the length of the data file.

1212

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1213

REVISION_ID.

1214

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1215

revision id already in the knit that is a parent of REVISION_ID.

1216

The ' :' marker is the end of record marker.

1217

1218

partial writes:

1219

when a write is interrupted to the index file, it will result in a line

1220

that does not end in ' :'. If the ' :' is not present at the end of a line,

1221

or at the end of the file, then the record that is missing it will be

1222

ignored by the parser.

1223

1224

When writing new records to the index file, the data is preceded by '\n'

1225

to ensure that records always start on new lines even if the last write was

1226

interrupted. As a result its normal for the last line in the index to be

1227

missing a trailing newline. One can be added with no harmful effects.

1228

"""

1229

1230

HEADER = "# bzr knit index 8\n"

1231

1232

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1233

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1234

1235

def _cache_version(self, version_id, options, pos, size, parents):

1236

"""Cache a version record in the history array and index cache.

1237

1238

This is inlined into _load_data for performance. KEEP IN SYNC.

1239

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1240

indexes).

1241

"""

1242

# only want the _history index to reference the 1st index entry

1243

# for version_id

1244

if version_id not in self._cache:

1245

index = len(self._history)

1246

self._history.append(version_id)

1247

else:

1248

index = self._cache[version_id][5]

1249

self._cache[version_id] = (version_id,

1250

options,

1251

pos,

1252

size,

1253

parents,

1254

index)

1255

1256

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1257

create_parent_dir=False, delay_create=False, dir_mode=None):

1258

_KnitComponentFile.__init__(self, transport, filename, mode,

1259

file_mode=file_mode,

1260

create_parent_dir=create_parent_dir,

1261

dir_mode=dir_mode)

1262

self._cache = {}

1263

# position in _history is the 'official' index for a revision

1264

# but the values may have come from a newer entry.

1265

# so - wc -l of a knit index is != the number of unique names

1266

# in the knit.

1267

self._history = []

1268

try:

1269

fp = self._transport.get(self._filename)

1270

try:

1271

# _load_data may raise NoSuchFile if the target knit is

1272

# completely empty.

1273

_load_data(self, fp)

1274

finally:

1275

fp.close()

1276

except NoSuchFile:

1277

if mode != 'w' or not create:

1278

raise

1279

elif delay_create:

1280

self._need_to_create = True

1281

else:

1282

self._transport.put_bytes_non_atomic(

1283

self._filename, self.HEADER, mode=self._file_mode)

1284

1285

def get_graph(self):

1286

"""Return a list of the node:parents lists from this knit index."""

1287

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1288

1289

def get_ancestry(self, versions, topo_sorted=True):

1290

"""See VersionedFile.get_ancestry."""

1291

# get a graph of all the mentioned versions:

1292

graph = {}

1293

pending = set(versions)

1294

cache = self._cache

1295

while pending:

1296

version = pending.pop()

1297

# trim ghosts

1298

try:

1299

parents = [p for p in cache[version][4] if p in cache]

1300

except KeyError:

1301

raise RevisionNotPresent(version, self._filename)

1302

# if not completed and not a ghost

1303

pending.update([p for p in parents if p not in graph])

1304

graph[version] = parents

1305

if not topo_sorted:

1306

return graph.keys()

1307

return topo_sort(graph.items())

1308

1309

def get_ancestry_with_ghosts(self, versions):

1310

"""See VersionedFile.get_ancestry_with_ghosts."""

1311

# get a graph of all the mentioned versions:

1312

self.check_versions_present(versions)

1313

cache = self._cache

1314

graph = {}

1315

pending = set(versions)

1316

while pending:

1317

version = pending.pop()

1318

try:

1319

parents = cache[version][4]

1320

except KeyError:

1321

# ghost, fake it

1322

graph[version] = []

1323

else:

1324

# if not completed

1325

pending.update([p for p in parents if p not in graph])

1326

graph[version] = parents

1327

return topo_sort(graph.items())

1328

1329

def iter_parents(self, version_ids):

1330

"""Iterate through the parents for many version ids.

1331

1332

:param version_ids: An iterable yielding version_ids.

1333

:return: An iterator that yields (version_id, parents). Requested

1334

version_ids not present in the versioned file are simply skipped.

1335

The order is undefined, allowing for different optimisations in

1336

the underlying implementation.

1337

"""

1338

for version_id in version_ids:

1339

try:

1340

yield version_id, tuple(self.get_parents(version_id))

1341

except KeyError:

1342

pass

1343

1344

def num_versions(self):

1345

return len(self._history)

1346

1347

__len__ = num_versions

1348

1349

def get_versions(self):

1350

"""Get all the versions in the file. not topologically sorted."""

1351

return self._history

1352

1353

def _version_list_to_index(self, versions):

1354

result_list = []

1355

cache = self._cache

1356

for version in versions:

1357

if version in cache:

1358

# -- inlined lookup() --

1359

result_list.append(str(cache[version][5]))

1360

# -- end lookup () --

1361

else:

1362

result_list.append('.' + version)

1363

return ' '.join(result_list)

1364

1365

def add_version(self, version_id, options, index_memo, parents):

1366

"""Add a version record to the index."""

1367

self.add_versions(((version_id, options, index_memo, parents),))

1368

1369

def add_versions(self, versions):

1370

"""Add multiple versions to the index.

1371

1372

:param versions: a list of tuples:

1373

(version_id, options, pos, size, parents).

1374

"""

1375

lines = []

1376

orig_history = self._history[:]

1377

orig_cache = self._cache.copy()

1378

1379

try:

1380

for version_id, options, (index, pos, size), parents in versions:

1381

line = "\n%s %s %s %s %s :" % (version_id,

1382

','.join(options),

1383

pos,

1384

size,

1385

self._version_list_to_index(parents))

1386

assert isinstance(line, str), \

1387

'content must be utf-8 encoded: %r' % (line,)

1388

lines.append(line)

1389

self._cache_version(version_id, options, pos, size, parents)

1390

if not self._need_to_create:

1391

self._transport.append_bytes(self._filename, ''.join(lines))

1392

else:

1393

sio = StringIO()

1394

sio.write(self.HEADER)

1395

sio.writelines(lines)

1396

sio.seek(0)

1397

self._transport.put_file_non_atomic(self._filename, sio,

1398

create_parent_dir=self._create_parent_dir,

1399

mode=self._file_mode,

1400

dir_mode=self._dir_mode)

1401

self._need_to_create = False

1402

except:

1403

# If any problems happen, restore the original values and re-raise

1404

self._history = orig_history

1405

self._cache = orig_cache

1406

raise

1407

1408

def has_version(self, version_id):

1409

"""True if the version is in the index."""

1410

return version_id in self._cache

1411

1412

def get_position(self, version_id):

1413

"""Return details needed to access the version.

1414

1415

.kndx indices do not support split-out data, so return None for the

1416

index field.

1417

1418

:return: a tuple (None, data position, size) to hand to the access

1419

logic to get the record.

1420

"""

1421

entry = self._cache[version_id]

1422

return None, entry[2], entry[3]

1423

1424

def get_method(self, version_id):

1425

"""Return compression method of specified version."""

1426

try:

1427

options = self._cache[version_id][1]

1428

except KeyError:

1429

raise RevisionNotPresent(version_id, self._filename)

1430

if 'fulltext' in options:

1431

return 'fulltext'

1432

else:

1433

if 'line-delta' not in options:

1434

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1435

return 'line-delta'

1436

1437

def get_options(self, version_id):

1438

"""Return a string represention options.

1439

1440

e.g. foo,bar

1441

"""

1442

return self._cache[version_id][1]

1443

1444

def get_parents(self, version_id):

1445

"""Return parents of specified version ignoring ghosts."""

1446

return [parent for parent in self._cache[version_id][4]

1447

if parent in self._cache]

1448

1449

def get_parents_with_ghosts(self, version_id):

1450

"""Return parents of specified version with ghosts."""

1451

return self._cache[version_id][4]

1452

1453

def check_versions_present(self, version_ids):

1454

"""Check that all specified versions are present."""

1455

cache = self._cache

1456

for version_id in version_ids:

1457

if version_id not in cache:

1458

raise RevisionNotPresent(version_id, self._filename)

1459

1460

1461

class KnitGraphIndex(object):

1462

"""A knit index that builds on GraphIndex."""

1463

1464

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1465

"""Construct a KnitGraphIndex on a graph_index.

1466

1467

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1468

:param deltas: Allow delta-compressed records.

1469

:param add_callback: If not None, allow additions to the index and call

1470

this callback with a list of added GraphIndex nodes:

1471

[(node, value, node_refs), ...]

1472

:param parents: If True, record knits parents, if not do not record

1473

parents.

1474

"""

1475

self._graph_index = graph_index

1476

self._deltas = deltas

1477

self._add_callback = add_callback

1478

self._parents = parents

1479

if deltas and not parents:

1480

raise KnitCorrupt(self, "Cannot do delta compression without "

1481

"parent tracking.")

1482

1483

def _get_entries(self, keys, check_present=False):

1484

"""Get the entries for keys.

1485

1486

:param keys: An iterable of index keys, - 1-tuples.

1487

"""

1488

keys = set(keys)

1489

found_keys = set()

1490

if self._parents:

1491

for node in self._graph_index.iter_entries(keys):

1492

yield node

1493

found_keys.add(node[1])

1494

else:

1495

# adapt parentless index to the rest of the code.

1496

for node in self._graph_index.iter_entries(keys):

1497

yield node[0], node[1], node[2], ()

1498

found_keys.add(node[1])

1499

if check_present:

1500

missing_keys = keys.difference(found_keys)

1501

if missing_keys:

1502

raise RevisionNotPresent(missing_keys.pop(), self)

1503

1504

def _present_keys(self, version_ids):

1505

return set([

1506

node[1] for node in self._get_entries(version_ids)])

1507

1508

def _parentless_ancestry(self, versions):

1509

"""Honour the get_ancestry API for parentless knit indices."""

1510

wanted_keys = self._version_ids_to_keys(versions)

1511

present_keys = self._present_keys(wanted_keys)

1512

missing = set(wanted_keys).difference(present_keys)

1513

if missing:

1514

raise RevisionNotPresent(missing.pop(), self)

1515

return list(self._keys_to_version_ids(present_keys))

1516

1517

def get_ancestry(self, versions, topo_sorted=True):

1518

"""See VersionedFile.get_ancestry."""

1519

if not self._parents:

1520

return self._parentless_ancestry(versions)

1521

# XXX: This will do len(history) index calls - perhaps

1522

# it should be altered to be a index core feature?

1523

# get a graph of all the mentioned versions:

1524

graph = {}

1525

ghosts = set()

1526

versions = self._version_ids_to_keys(versions)

1527

pending = set(versions)

1528

while pending:

1529

# get all pending nodes

1530

this_iteration = pending

1531

new_nodes = self._get_entries(this_iteration)

1532

found = set()

1533

pending = set()

1534

for (index, key, value, node_refs) in new_nodes:

1535

# dont ask for ghosties - otherwise

1536

# we we can end up looping with pending

1537

# being entirely ghosted.

1538

graph[key] = [parent for parent in node_refs[0]

1539

if parent not in ghosts]

1540

# queue parents

1541

for parent in graph[key]:

1542

# dont examine known nodes again

1543

if parent in graph:

1544

continue

1545

pending.add(parent)

1546

found.add(key)

1547

ghosts.update(this_iteration.difference(found))

1548

if versions.difference(graph):

1549

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1550

if topo_sorted:

1551

result_keys = topo_sort(graph.items())

1552

else:

1553

result_keys = graph.iterkeys()

1554

return [key[0] for key in result_keys]

1555

1556

def get_ancestry_with_ghosts(self, versions):

1557

"""See VersionedFile.get_ancestry."""

1558

if not self._parents:

1559

return self._parentless_ancestry(versions)

1560

# XXX: This will do len(history) index calls - perhaps

1561

# it should be altered to be a index core feature?

1562

# get a graph of all the mentioned versions:

1563

graph = {}

1564

versions = self._version_ids_to_keys(versions)

1565

pending = set(versions)

1566

while pending:

1567

# get all pending nodes

1568

this_iteration = pending

1569

new_nodes = self._get_entries(this_iteration)

1570

pending = set()

1571

for (index, key, value, node_refs) in new_nodes:

1572

graph[key] = node_refs[0]

1573

# queue parents

1574

for parent in graph[key]:

1575

# dont examine known nodes again

1576

if parent in graph:

1577

continue

1578

pending.add(parent)

1579

missing_versions = this_iteration.difference(graph)

1580

missing_needed = versions.intersection(missing_versions)

1581

if missing_needed:

1582

raise RevisionNotPresent(missing_needed.pop(), self)

1583

for missing_version in missing_versions:

1584

# add a key, no parents

1585

graph[missing_version] = []

1586

pending.discard(missing_version) # don't look for it

1587

result_keys = topo_sort(graph.items())

1588

return [key[0] for key in result_keys]

1589

1590

def get_graph(self):

1591

"""Return a list of the node:parents lists from this knit index."""

1592

if not self._parents:

1593

return [(key, ()) for key in self.get_versions()]

1594

result = []

1595

for index, key, value, refs in self._graph_index.iter_all_entries():

1596

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1597

return result

1598

1599

def iter_parents(self, version_ids):

1600

"""Iterate through the parents for many version ids.

1601

1602

:param version_ids: An iterable yielding version_ids.

1603

:return: An iterator that yields (version_id, parents). Requested

1604

version_ids not present in the versioned file are simply skipped.

1605

The order is undefined, allowing for different optimisations in

1606

the underlying implementation.

1607

"""

1608

if self._parents:

1609

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1610

all_parents = set()

1611

present_parents = set()

1612

for node in all_nodes:

1613

all_parents.update(node[3][0])

1614

# any node we are querying must be present

1615

present_parents.add(node[1])

1616

unknown_parents = all_parents.difference(present_parents)

1617

present_parents.update(self._present_keys(unknown_parents))

1618

for node in all_nodes:

1619

parents = []

1620

for parent in node[3][0]:

1621

if parent in present_parents:

1622

parents.append(parent[0])

1623

yield node[1][0], tuple(parents)

1624

else:

1625

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1626

yield node[1][0], ()

1627

1628

def num_versions(self):

1629

return len(list(self._graph_index.iter_all_entries()))

1630

1631

__len__ = num_versions

1632

1633

def get_versions(self):

1634

"""Get all the versions in the file. not topologically sorted."""

1635

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1636

1637

def has_version(self, version_id):

1638

"""True if the version is in the index."""

1639

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1640

1641

def _keys_to_version_ids(self, keys):

1642

return tuple(key[0] for key in keys)

1643

1644

def get_position(self, version_id):

1645

"""Return details needed to access the version.

1646

1647

:return: a tuple (index, data position, size) to hand to the access

1648

logic to get the record.

1649

"""

1650

node = self._get_node(version_id)

1651

bits = node[2][1:].split(' ')

1652

return node[0], int(bits[0]), int(bits[1])

1653

1654

def get_method(self, version_id):

1655

"""Return compression method of specified version."""

1656

if not self._deltas:

1657

return 'fulltext'

1658

return self._parent_compression(self._get_node(version_id)[3][1])

1659

1660

def _parent_compression(self, reference_list):

1661

# use the second reference list to decide if this is delta'd or not.

1662

if len(reference_list):

1663

return 'line-delta'

1664

else:

1665

return 'fulltext'

1666

1667

def _get_node(self, version_id):

1668

try:

1669

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1670

except IndexError:

1671

raise RevisionNotPresent(version_id, self)

1672

1673

def get_options(self, version_id):

1674

"""Return a string represention options.

1675

1676

e.g. foo,bar

1677

"""

1678

node = self._get_node(version_id)

1679

if not self._deltas:

1680

options = ['fulltext']

1681

else:

1682

options = [self._parent_compression(node[3][1])]

1683

if node[2][0] == 'N':

1684

options.append('no-eol')

1685

return options

1686

1687

def get_parents(self, version_id):

1688

"""Return parents of specified version ignoring ghosts."""

1689

parents = list(self.iter_parents([version_id]))

1690

if not parents:

1691

# missing key

1692

raise errors.RevisionNotPresent(version_id, self)

1693

return parents[0][1]

1694

1695

def get_parents_with_ghosts(self, version_id):

1696

"""Return parents of specified version with ghosts."""

1697

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1698

check_present=True))

1699

if not self._parents:

1700

return ()

1701

return self._keys_to_version_ids(nodes[0][3][0])

1702

1703

def check_versions_present(self, version_ids):

1704

"""Check that all specified versions are present."""

1705

keys = self._version_ids_to_keys(version_ids)

1706

present = self._present_keys(keys)

1707

missing = keys.difference(present)

1708

if missing:

1709

raise RevisionNotPresent(missing.pop(), self)

1710

1711

def add_version(self, version_id, options, access_memo, parents):

1712

"""Add a version record to the index."""

1713

return self.add_versions(((version_id, options, access_memo, parents),))

1714

1715

def add_versions(self, versions):

1716

"""Add multiple versions to the index.

1717

1718

This function does not insert data into the Immutable GraphIndex

1719

backing the KnitGraphIndex, instead it prepares data for insertion by

1720

the caller and checks that it is safe to insert then calls

1721

self._add_callback with the prepared GraphIndex nodes.

1722

1723

:param versions: a list of tuples:

1724

(version_id, options, pos, size, parents).

1725

"""

1726

if not self._add_callback:

1727

raise errors.ReadOnlyError(self)

1728

# we hope there are no repositories with inconsistent parentage

1729

# anymore.

1730

# check for dups

1731

1732

keys = {}

1733

for (version_id, options, access_memo, parents) in versions:

1734

index, pos, size = access_memo

1735

key = (version_id, )

1736

parents = tuple((parent, ) for parent in parents)

1737

if 'no-eol' in options:

1738

value = 'N'

1739

else:

1740

value = ' '

1741

value += "%d %d" % (pos, size)

1742

if not self._deltas:

1743

if 'line-delta' in options:

1744

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1745

if self._parents:

1746

if self._deltas:

1747

if 'line-delta' in options:

1748

node_refs = (parents, (parents[0],))

1749

else:

1750

node_refs = (parents, ())

1751

else:

1752

node_refs = (parents, )

1753

else:

1754

if parents:

1755

raise KnitCorrupt(self, "attempt to add node with parents "

1756

"in parentless index.")

1757

node_refs = ()

1758

keys[key] = (value, node_refs)

1759

present_nodes = self._get_entries(keys)

1760

for (index, key, value, node_refs) in present_nodes:

1761

if (value, node_refs) != keys[key]:

1762

raise KnitCorrupt(self, "inconsistent details in add_versions"

1763

": %s %s" % ((value, node_refs), keys[key]))

1764

del keys[key]

1765

result = []

1766

if self._parents:

1767

for key, (value, node_refs) in keys.iteritems():

1768

result.append((key, value, node_refs))

1769

else:

1770

for key, (value, node_refs) in keys.iteritems():

1771

result.append((key, value))

1772

self._add_callback(result)

1773

1774

def _version_ids_to_keys(self, version_ids):

1775

return set((version_id, ) for version_id in version_ids)

1776

1777

1778

class _KnitAccess(object):

1779

"""Access to knit records in a .knit file."""

1780

1781

def __init__(self, transport, filename, _file_mode, _dir_mode,

1782

_need_to_create, _create_parent_dir):

1783

"""Create a _KnitAccess for accessing and inserting data.

1784

1785

:param transport: The transport the .knit is located on.

1786

:param filename: The filename of the .knit.

1787

"""

1788

self._transport = transport

1789

self._filename = filename

1790

self._file_mode = _file_mode

1791

self._dir_mode = _dir_mode

1792

self._need_to_create = _need_to_create

1793

self._create_parent_dir = _create_parent_dir

1794

1795

def add_raw_records(self, sizes, raw_data):

1796

"""Add raw knit bytes to a storage area.

1797

1798

The data is spooled to whereever the access method is storing data.

1799

1800

:param sizes: An iterable containing the size of each raw data segment.

1801

:param raw_data: A bytestring containing the data.

1802

:return: A list of memos to retrieve the record later. Each memo is a

1803

tuple - (index, pos, length), where the index field is always None

1804

for the .knit access method.

1805

"""

1806

assert type(raw_data) == str, \

1807

'data must be plain bytes was %s' % type(raw_data)

1808

if not self._need_to_create:

1809

base = self._transport.append_bytes(self._filename, raw_data)

1810

else:

1811

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1812

create_parent_dir=self._create_parent_dir,

1813

mode=self._file_mode,

1814

dir_mode=self._dir_mode)

1815

self._need_to_create = False

1816

base = 0

1817

result = []

1818

for size in sizes:

1819

result.append((None, base, size))

1820

base += size

1821

return result

1822

1823

def create(self):

1824

"""IFF this data access has its own storage area, initialise it.

1825

1826

:return: None.

1827

"""

1828

self._transport.put_bytes_non_atomic(self._filename, '',

1829

mode=self._file_mode)

1830

1831

def open_file(self):

1832

"""IFF this data access can be represented as a single file, open it.

1833

1834

For knits that are not mapped to a single file on disk this will

1835

always return None.

1836

1837

:return: None or a file handle.

1838

"""

1839

try:

1840

return self._transport.get(self._filename)

1841

except NoSuchFile:

1842

pass

1843

return None

1844

1845

def get_raw_records(self, memos_for_retrieval):

1846

"""Get the raw bytes for a records.

1847

1848

:param memos_for_retrieval: An iterable containing the (index, pos,

1849

length) memo for retrieving the bytes. The .knit method ignores

1850

the index as there is always only a single file.

1851

:return: An iterator over the bytes of the records.

1852

"""

1853

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1854

for pos, data in self._transport.readv(self._filename, read_vector):

1855

yield data

1856

1857

1858

class _PackAccess(object):

1859

"""Access to knit records via a collection of packs."""

1860

1861

def __init__(self, index_to_packs, writer=None):

1862

"""Create a _PackAccess object.

1863

1864

:param index_to_packs: A dict mapping index objects to the transport

1865

and file names for obtaining data.

1866

:param writer: A tuple (pack.ContainerWriter, write_index) which

1867

contains the pack to write, and the index that reads from it will

1868

be associated with.

1869

"""

1870

if writer:

1871

self.container_writer = writer[0]

1872

self.write_index = writer[1]

1873

else:

1874

self.container_writer = None

1875

self.write_index = None

1876

self.indices = index_to_packs

1877

1878

def add_raw_records(self, sizes, raw_data):

1879

"""Add raw knit bytes to a storage area.

1880

1881

The data is spooled to the container writer in one bytes-record per

1882

raw data item.

1883

1884

:param sizes: An iterable containing the size of each raw data segment.

1885

:param raw_data: A bytestring containing the data.

1886

:return: A list of memos to retrieve the record later. Each memo is a

1887

tuple - (index, pos, length), where the index field is the

1888

write_index object supplied to the PackAccess object.

1889

"""

1890

assert type(raw_data) == str, \

1891

'data must be plain bytes was %s' % type(raw_data)

1892

result = []

1893

offset = 0

1894

for size in sizes:

1895

p_offset, p_length = self.container_writer.add_bytes_record(

1896

raw_data[offset:offset+size], [])

1897

offset += size

1898

result.append((self.write_index, p_offset, p_length))

1899

return result

1900

1901

def create(self):

1902

"""Pack based knits do not get individually created."""

1903

1904

def get_raw_records(self, memos_for_retrieval):

1905

"""Get the raw bytes for a records.

1906

1907

:param memos_for_retrieval: An iterable containing the (index, pos,

1908

length) memo for retrieving the bytes. The Pack access method

1909

looks up the pack to use for a given record in its index_to_pack

1910

map.

1911

:return: An iterator over the bytes of the records.

1912

"""

1913

# first pass, group into same-index requests

1914

request_lists = []

1915

current_index = None

1916

for (index, offset, length) in memos_for_retrieval:

1917

if current_index == index:

1918

current_list.append((offset, length))

1919

else:

1920

if current_index is not None:

1921

request_lists.append((current_index, current_list))

1922

current_index = index

1923

current_list = [(offset, length)]

1924

# handle the last entry

1925

if current_index is not None:

1926

request_lists.append((current_index, current_list))

1927

for index, offsets in request_lists:

1928

transport, path = self.indices[index]

1929

reader = pack.make_readv_reader(transport, path, offsets)

1930

for names, read_func in reader.iter_records():

1931

yield read_func(None)

1932

1933

def open_file(self):

1934

"""Pack based knits have no single file."""

1935

return None

1936

1937

def set_writer(self, writer, index, (transport, packname)):

1938

"""Set a writer to use for adding data."""

1939

self.indices[index] = (transport, packname)

1940

self.container_writer = writer

1941

self.write_index = index

1942

1943

1944

class _KnitData(object):

1945

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1946

1947

The KnitData class provides the logic for parsing and using knit records,

1948

making use of an access method for the low level read and write operations.

1949

"""

1950

1951

def __init__(self, access):

1952

"""Create a KnitData object.

1953

1954

:param access: The access method to use. Access methods such as

1955

_KnitAccess manage the insertion of raw records and the subsequent

1956

retrieval of the same.

1957

"""

1958

self._access = access

1959

self._checked = False

1960

# TODO: jam 20060713 conceptually, this could spill to disk

1961

# if the cached size gets larger than a certain amount

1962

# but it complicates the model a bit, so for now just use

1963

# a simple dictionary

1964

self._cache = {}

1965

self._do_cache = False

1966

1967

def enable_cache(self):

1968

"""Enable caching of reads."""

1969

self._do_cache = True

1970

1971

def clear_cache(self):

1972

"""Clear the record cache."""

1973

self._do_cache = False

1974

self._cache = {}

1975

1976

def _open_file(self):

1977

return self._access.open_file()

1978

1979

def _record_to_data(self, version_id, digest, lines):

1980

"""Convert version_id, digest, lines into a raw data block.

1981

1982

:return: (len, a StringIO instance with the raw data ready to read.)

1983

"""

1984

bytes = (''.join(chain(

1985

["version %s %d %s\n" % (version_id,

1986

len(lines),

1987

digest)],

1988

lines,

1989

["end %s\n" % version_id])))

1990

assert bytes.__class__ == str

1991

compressed_bytes = bytes_to_gzip(bytes)

1992

return len(compressed_bytes), compressed_bytes

1993

1994

def add_raw_records(self, sizes, raw_data):

1995

"""Append a prepared record to the data file.

1996

1997

:param sizes: An iterable containing the size of each raw data segment.

1998

:param raw_data: A bytestring containing the data.

1999

:return: a list of index data for the way the data was stored.

2000

See the access method add_raw_records documentation for more

2001

details.

2002

"""

2003

return self._access.add_raw_records(sizes, raw_data)

2004

2005

def add_record(self, version_id, digest, lines):

2006

"""Write new text record to disk.

2007

2008

Returns index data for retrieving it later, as per add_raw_records.

2009

"""

2010

size, bytes = self._record_to_data(version_id, digest, lines)

2011

result = self.add_raw_records([size], bytes)

2012

if self._do_cache:

2013

self._cache[version_id] = bytes

2014

return result[0]

2015

2016

def _parse_record_header(self, version_id, raw_data):

2017

"""Parse a record header for consistency.

2018

2019

:return: the header and the decompressor stream.

2020

as (stream, header_record)

2021

"""

2022

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

2023

try:

2024

rec = self._check_header(version_id, df.readline())

2025

except Exception, e:

2026

raise KnitCorrupt(self._access,

2027

"While reading {%s} got %s(%s)"

2028

% (version_id, e.__class__.__name__, str(e)))

2029

return df, rec

2030

2031

def _check_header(self, version_id, line):

2032

rec = line.split()

2033

if len(rec) != 4:

2034

raise KnitCorrupt(self._access,

2035

'unexpected number of elements in record header')

2036

if rec[1] != version_id:

2037

raise KnitCorrupt(self._access,

2038

'unexpected version, wanted %r, got %r'

2039

% (version_id, rec[1]))

2040

return rec

2041

2042

def _parse_record(self, version_id, data):

2043

# profiling notes:

2044

# 4168 calls in 2880 217 internal

2045

# 4168 calls to _parse_record_header in 2121

2046

# 4168 calls to readlines in 330

2047

df = GzipFile(mode='rb', fileobj=StringIO(data))

2048

2049

try:

2050

record_contents = df.readlines()

2051

except Exception, e:

2052

raise KnitCorrupt(self._access,

2053

"While reading {%s} got %s(%s)"

2054

% (version_id, e.__class__.__name__, str(e)))

2055

header = record_contents.pop(0)

2056

rec = self._check_header(version_id, header)

2057

2058

last_line = record_contents.pop()

2059

if len(record_contents) != int(rec[2]):

2060

raise KnitCorrupt(self._access,

2061

'incorrect number of lines %s != %s'

2062

' for version {%s}'

2063

% (len(record_contents), int(rec[2]),

2064

version_id))

2065

if last_line != 'end %s\n' % rec[1]:

2066

raise KnitCorrupt(self._access,

2067

'unexpected version end line %r, wanted %r'

2068

% (last_line, version_id))

2069

df.close()

2070

return record_contents, rec[3]

2071

2072

def read_records_iter_raw(self, records):

2073

"""Read text records from data file and yield raw data.

2074

2075

This unpacks enough of the text record to validate the id is

2076

as expected but thats all.

2077

"""

2078

# setup an iterator of the external records:

2079

# uses readv so nice and fast we hope.

2080

if len(records):

2081

# grab the disk data needed.

2082

if self._cache:

2083

# Don't check _cache if it is empty

2084

needed_offsets = [index_memo for version_id, index_memo

2085

in records

2086

if version_id not in self._cache]

2087

else:

2088

needed_offsets = [index_memo for version_id, index_memo

2089

in records]

2090

2091

raw_records = self._access.get_raw_records(needed_offsets)

2092

2093

for version_id, index_memo in records:

2094

if version_id in self._cache:

2095

# This data has already been validated

2096

data = self._cache[version_id]

2097

else:

2098

data = raw_records.next()

2099

if self._do_cache:

2100

self._cache[version_id] = data

2101

2102

# validate the header

2103

df, rec = self._parse_record_header(version_id, data)

2104

df.close()

2105

yield version_id, data

2106

2107

def read_records_iter(self, records):

2108

"""Read text records from data file and yield result.

2109

2110

The result will be returned in whatever is the fastest to read.

2111

Not by the order requested. Also, multiple requests for the same

2112

record will only yield 1 response.

2113

:param records: A list of (version_id, pos, len) entries

2114

:return: Yields (version_id, contents, digest) in the order

2115

read, not the order requested

2116

"""

2117

if not records:

2118

return

2119

2120

if self._cache:

2121

# Skip records we have alread seen

2122

yielded_records = set()

2123

needed_records = set()

2124

for record in records:

2125

if record[0] in self._cache:

2126

if record[0] in yielded_records:

2127

continue

2128

yielded_records.add(record[0])

2129

data = self._cache[record[0]]

2130

content, digest = self._parse_record(record[0], data)

2131

yield (record[0], content, digest)

2132

else:

2133

needed_records.add(record)

2134

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2135

else:

2136

needed_records = sorted(set(records), key=operator.itemgetter(1))

2137

2138

if not needed_records:

2139

return

2140

2141

# The transport optimizes the fetching as well

2142

# (ie, reads continuous ranges.)

2143

raw_data = self._access.get_raw_records(

2144

[index_memo for version_id, index_memo in needed_records])

2145

2146

for (version_id, index_memo), data in \

2147

izip(iter(needed_records), raw_data):

2148

content, digest = self._parse_record(version_id, data)

2149

if self._do_cache:

2150

self._cache[version_id] = data

2151

yield version_id, content, digest

2152

2153

def read_records(self, records):

2154

"""Read records into a dictionary."""

2155

components = {}

2156

for record_id, content, digest in \

2157

self.read_records_iter(records):

2158

components[record_id] = (content, digest)

2159

return components

2160

2161

2162

class InterKnit(InterVersionedFile):

2163

"""Optimised code paths for knit to knit operations."""

2164

2165

_matching_file_from_factory = KnitVersionedFile

2166

_matching_file_to_factory = KnitVersionedFile

2167

2168

@staticmethod

2169

def is_compatible(source, target):

2170

"""Be compatible with knits. """

2171

try:

2172

return (isinstance(source, KnitVersionedFile) and

2173

isinstance(target, KnitVersionedFile))

2174

except AttributeError:

2175

return False

2176

2177

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2178

"""See InterVersionedFile.join."""

2179

assert isinstance(self.source, KnitVersionedFile)

2180

assert isinstance(self.target, KnitVersionedFile)

2181

2182

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2183

2184

if not version_ids:

2185

return 0

2186

2187

pb = ui.ui_factory.nested_progress_bar()

2188

try:

2189

version_ids = list(version_ids)

2190

if None in version_ids:

2191

version_ids.remove(None)

2192

2193

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2194

this_versions = set(self.target._index.get_versions())

2195

needed_versions = self.source_ancestry - this_versions

2196

cross_check_versions = self.source_ancestry.intersection(this_versions)

2197

mismatched_versions = set()

2198

for version in cross_check_versions:

2199

# scan to include needed parents.

2200

n1 = set(self.target.get_parents_with_ghosts(version))

2201

n2 = set(self.source.get_parents_with_ghosts(version))

2202

if n1 != n2:

2203

# FIXME TEST this check for cycles being introduced works

2204

# the logic is we have a cycle if in our graph we are an

2205

# ancestor of any of the n2 revisions.

2206

for parent in n2:

2207

if parent in n1:

2208

# safe

2209

continue

2210

else:

2211

parent_ancestors = self.source.get_ancestry(parent)

2212

if version in parent_ancestors:

2213

raise errors.GraphCycleError([parent, version])

2214

# ensure this parent will be available later.

2215

new_parents = n2.difference(n1)

2216

needed_versions.update(new_parents.difference(this_versions))

2217

mismatched_versions.add(version)

2218

2219

if not needed_versions and not mismatched_versions:

2220

return 0

2221

full_list = topo_sort(self.source.get_graph())

2222

2223

version_list = [i for i in full_list if (not self.target.has_version(i)

2224

and i in needed_versions)]

2225

2226

# plan the join:

2227

copy_queue = []

2228

copy_queue_records = []

2229

copy_set = set()

2230

for version_id in version_list:

2231

options = self.source._index.get_options(version_id)

2232

parents = self.source._index.get_parents_with_ghosts(version_id)

2233

# check that its will be a consistent copy:

2234

for parent in parents:

2235

# if source has the parent, we must :

2236

# * already have it or

2237

# * have it scheduled already

2238

# otherwise we don't care

2239

assert (self.target.has_version(parent) or

2240

parent in copy_set or

2241

not self.source.has_version(parent))

2242

index_memo = self.source._index.get_position(version_id)

2243

copy_queue_records.append((version_id, index_memo))

2244

copy_queue.append((version_id, options, parents))

2245

copy_set.add(version_id)

2246

2247

# data suck the join:

2248

count = 0

2249

total = len(version_list)

2250

raw_datum = []

2251

raw_records = []

2252

for (version_id, raw_data), \

2253

(version_id2, options, parents) in \

2254

izip(self.source._data.read_records_iter_raw(copy_queue_records),

2255

copy_queue):

2256

assert version_id == version_id2, 'logic error, inconsistent results'

2257

count = count + 1

2258

pb.update("Joining knit", count, total)

2259

raw_records.append((version_id, options, parents, len(raw_data)))

2260

raw_datum.append(raw_data)

2261

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2262

2263

for version in mismatched_versions:

2264

# FIXME RBC 20060309 is this needed?

2265

n1 = set(self.target.get_parents_with_ghosts(version))

2266

n2 = set(self.source.get_parents_with_ghosts(version))

2267

# write a combined record to our history preserving the current

2268

# parents as first in the list

2269

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2270

self.target.fix_parents(version, new_parents)

2271

return count

2272

finally:

2273

pb.finished()

2274

2275

2276

InterVersionedFile.register_optimiser(InterKnit)

2277

2278

2279

class WeaveToKnit(InterVersionedFile):

2280

"""Optimised code paths for weave to knit operations."""

2281

2282

_matching_file_from_factory = bzrlib.weave.WeaveFile

2283

_matching_file_to_factory = KnitVersionedFile

2284

2285

@staticmethod

2286

def is_compatible(source, target):

2287

"""Be compatible with weaves to knits."""

2288

try:

2289

return (isinstance(source, bzrlib.weave.Weave) and

2290

isinstance(target, KnitVersionedFile))

2291

except AttributeError:

2292

return False

2293

2294

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2295

"""See InterVersionedFile.join."""

2296

assert isinstance(self.source, bzrlib.weave.Weave)

2297

assert isinstance(self.target, KnitVersionedFile)

2298

2299

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2300

2301

if not version_ids:

2302

return 0

2303

2304

pb = ui.ui_factory.nested_progress_bar()

2305

try:

2306

version_ids = list(version_ids)

2307

2308

self.source_ancestry = set(self.source.get_ancestry(version_ids))

2309

this_versions = set(self.target._index.get_versions())

2310

needed_versions = self.source_ancestry - this_versions

2311

cross_check_versions = self.source_ancestry.intersection(this_versions)

2312

mismatched_versions = set()

2313

for version in cross_check_versions:

2314

# scan to include needed parents.

2315

n1 = set(self.target.get_parents_with_ghosts(version))

2316

n2 = set(self.source.get_parents(version))

2317

# if all of n2's parents are in n1, then its fine.

2318

if n2.difference(n1):

2319

# FIXME TEST this check for cycles being introduced works

2320

# the logic is we have a cycle if in our graph we are an

2321

# ancestor of any of the n2 revisions.

2322

for parent in n2:

2323

if parent in n1:

2324

# safe

2325

continue

2326

else:

2327

parent_ancestors = self.source.get_ancestry(parent)

2328

if version in parent_ancestors:

2329

raise errors.GraphCycleError([parent, version])

2330

# ensure this parent will be available later.

2331

new_parents = n2.difference(n1)

2332

needed_versions.update(new_parents.difference(this_versions))

2333

mismatched_versions.add(version)

2334

2335

if not needed_versions and not mismatched_versions:

2336

return 0

2337

full_list = topo_sort(self.source.get_graph())

2338

2339

version_list = [i for i in full_list if (not self.target.has_version(i)

2340

and i in needed_versions)]

2341

2342

# do the join:

2343

count = 0

2344

total = len(version_list)

2345

for version_id in version_list:

2346

pb.update("Converting to knit", count, total)

2347

parents = self.source.get_parents(version_id)

2348

# check that its will be a consistent copy:

2349

for parent in parents:

2350

# if source has the parent, we must already have it

2351

assert (self.target.has_version(parent))

2352

self.target.add_lines(

2353

version_id, parents, self.source.get_lines(version_id))

2354

count = count + 1

2355

2356

for version in mismatched_versions:

2357

# FIXME RBC 20060309 is this needed?

2358

n1 = set(self.target.get_parents_with_ghosts(version))

2359

n2 = set(self.source.get_parents(version))

2360

# write a combined record to our history preserving the current

2361

# parents as first in the list

2362

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

2363

self.target.fix_parents(version, new_parents)

2364

return count

2365

finally:

2366

pb.finished()

2367

2368

2369

InterVersionedFile.register_optimiser(WeaveToKnit)

2370

2371

2372

# Deprecated, use PatienceSequenceMatcher instead

2373

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2374

2375

2376

def annotate_knit(knit, revision_id):

2377

"""Annotate a knit with no cached annotations.

2378

2379

This implementation is for knits with no cached annotations.

2380

It will work for knits with cached annotations, but this is not

2381

recommended.

2382

"""

2383

ancestry = knit.get_ancestry(revision_id)

2384

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2385

annotations = {}

2386

for candidate in ancestry:

2387

if candidate in annotations:

2388

continue

2389

parents = knit.get_parents(candidate)

2390

if len(parents) == 0:

2391

blocks = None

2392

elif knit._index.get_method(candidate) != 'line-delta':

2393

blocks = None

2394

else:

2395

parent, sha1, noeol, delta = knit.get_delta(candidate)

2396

blocks = KnitContent.get_line_delta_blocks(delta,

2397

fulltext[parents[0]], fulltext[candidate])

2398

annotations[candidate] = list(annotate.reannotate([annotations[p]

2399

for p in parents], fulltext[candidate], candidate, blocks))

2400

return iter(annotations[revision_id])

2401

2402

2403

try:

2404

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2405

except ImportError:

2406

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »