~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-09-20 02:40:52 UTC
mfrom: (2835.1.1 ianc-integration)
Revision ID: pqm@pqm.ubuntu.com-20070920024052-y2l7r5o00zrpnr73

No longer propagate index differences automatically (Robert Collins)

files added:
COPYING.txt

bzr.ico

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/api.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/multiparent.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/revisiontree.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/workingtree_4.py

bzrlib/xml6.py

bzrlib/xml7.py

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/last-modified.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/centralized_workflow.txt

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/server.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/version_info.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

man1

profile_imports.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

tools/win32/survey.txt

files removed:
NEWS.developers

build-api

bzrlib/tests/test_doc_generate.py

bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/README.1st

patience-test.py

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/graph.py => bzrlib/deprecated_graph.py

bzrlib/tests/test_revprops.py => bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/win32console.py => bzrlib/win32utils.py

HACKING => doc/developers/HACKING.txt

doc/configuration.txt => doc/en/user-guide/configuration.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/tutorial.txt => doc/en/user-guide/tutorial.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

generate_docs.py

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import *

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

pack,

trace,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitDataStreamIncompatible,

KnitHeaderError,

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_strings,

108

)

109

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

110

from bzrlib.tsort import topo_sort

111

import bzrlib.ui

112

import bzrlib.weave

113

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

114

115

116

# TODO: Split out code specific to this format into an associated object.

118

# TODO: Can we put in some kind of value to check that the index and data

119

# files belong together?

120

# TODO: accomodate binaries, perhaps by storing a byte count

121

# TODO: accommodate binaries, perhaps by storing a byte count

122

123

# TODO: function to check whole file

124

104

133

class KnitContent(object):

105

134

"""Content of a knit version to which deltas can be applied."""

106

135

107

def __init__(self, lines):

108

self._lines = lines

109

110

def annotate_iter(self):

111

"""Yield tuples of (origin, text) for each content line."""

112

for origin, text in self._lines:

113

yield origin, text

114

115

136

def annotate(self):

116

137

"""Return a list of (origin, text) tuples."""

117

138

return list(self.annotate_iter())

118

139

119

140

def line_delta_iter(self, new_lines):

120

141

"""Generate line-based delta from this content to new_lines."""

121

new_texts = [text for origin, text in new_lines._lines]

122

old_texts = [text for origin, text in self._lines]

123

s = SequenceMatcher(None, old_texts, new_texts)

124

for op in s.get_opcodes():

125

if op[0] == 'equal':

142

new_texts = new_lines.text()

143

old_texts = self.text()

144

s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)

145

for tag, i1, i2, j1, j2 in s.get_opcodes():

146

if tag == 'equal':

126

147

continue

127

# ofrom oto length data

128

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

148

# ofrom, oto, length, data

149

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

129

150

130

151

def line_delta(self, new_lines):

131

152

return list(self.line_delta_iter(new_lines))

132

153

154

@staticmethod

155

def get_line_delta_blocks(knit_delta, source, target):

156

"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""

157

target_len = len(target)

158

s_pos = 0

159

t_pos = 0

160

for s_begin, s_end, t_len, new_text in knit_delta:

161

true_n = s_begin - s_pos

162

n = true_n

163

if n > 0:

164

# knit deltas do not provide reliable info about whether the

165

# last line of a file matches, due to eol handling.

166

if source[s_pos + n -1] != target[t_pos + n -1]:

167

n-=1

168

if n > 0:

169

yield s_pos, t_pos, n

170

t_pos += t_len + true_n

171

s_pos = s_end

172

n = target_len - t_pos

173

if n > 0:

174

if source[s_pos + n -1] != target[t_pos + n -1]:

175

n-=1

176

if n > 0:

177

yield s_pos, t_pos, n

178

yield s_pos + (target_len - t_pos), target_len, 0

179

180

181

class AnnotatedKnitContent(KnitContent):

182

"""Annotated content."""

183

184

def __init__(self, lines):

185

self._lines = lines

186

187

def annotate_iter(self):

188

"""Yield tuples of (origin, text) for each content line."""

189

return iter(self._lines)

190

191

def strip_last_line_newline(self):

192

line = self._lines[-1][1].rstrip('\n')

193

self._lines[-1] = (self._lines[-1][0], line)

194

133

195

def text(self):

134

196

return [text for origin, text in self._lines]

135

197

136

137

class _KnitFactory(object):

138

"""Base factory for creating content objects."""

139

140

def make(self, lines, version):

141

num_lines = len(lines)

142

return KnitContent(zip([version] * num_lines, lines))

143

144

145

class KnitAnnotateFactory(_KnitFactory):

198

def copy(self):

199

return AnnotatedKnitContent(self._lines[:])

200

201

202

class PlainKnitContent(KnitContent):

203

"""Unannotated content.

204

205

When annotate[_iter] is called on this content, the same version is reported

206

for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent

207

objects.

208

"""

209

210

def __init__(self, lines, version_id):

211

self._lines = lines

212

self._version_id = version_id

213

214

def annotate_iter(self):

215

"""Yield tuples of (origin, text) for each content line."""

216

for line in self._lines:

217

yield self._version_id, line

218

219

def copy(self):

220

return PlainKnitContent(self._lines[:], self._version_id)

221

222

def strip_last_line_newline(self):

223

self._lines[-1] = self._lines[-1].rstrip('\n')

224

225

def text(self):

226

return self._lines

227

228

229

class KnitAnnotateFactory(object):

146

230

"""Factory for creating annotated Content objects."""

147

231

148

232

annotated = True

149

233

150

def parse_fulltext(self, content, version):

234

def make(self, lines, version_id):

235

num_lines = len(lines)

236

return AnnotatedKnitContent(zip([version_id] * num_lines, lines))

237

238

def parse_fulltext(self, content, version_id):

151

239

"""Convert fulltext to internal representation

152

240

153

241

fulltext content is of the format

155

243

internal representation is of the format:

156

244

(revid, plaintext)

157

245

"""

158

lines = []

159

for line in content:

160

origin, text = line.split(' ', 1)

161

lines.append((origin.decode('utf-8'), text))

162

return KnitContent(lines)

246

# TODO: jam 20070209 The tests expect this to be returned as tuples,

247

# but the code itself doesn't really depend on that.

248

# Figure out a way to not require the overhead of turning the

249

# list back into tuples.

250

lines = [tuple(line.split(' ', 1)) for line in content]

251

return AnnotatedKnitContent(lines)

163

252

164

253

def parse_line_delta_iter(self, lines):

165

for result_item in self.parse_line_delta[lines]:

166

yield result_item

254

return iter(self.parse_line_delta(lines))

167

255

168

def parse_line_delta(self, lines, version):

256

def parse_line_delta(self, lines, version_id):

169

257

"""Convert a line based delta into internal representation.

170

258

171

259

line delta is in the form of:

172

260

intstart intend intcount

173

261

1..count lines:

174

262

revid(utf8) newline\n

175

internal represnetation is

263

internal representation is

176

264

(start, end, count, [1..count tuples (revid, newline)])

177

265

"""

178

266

result = []

179

267

lines = iter(lines)

180

268

next = lines.next

269

270

cache = {}

271

def cache_and_return(line):

272

origin, text = line.split(' ', 1)

273

return cache.setdefault(origin, origin), text

274

181

275

# walk through the lines parsing.

182

276

for header in lines:

183

277

start, end, count = [int(n) for n in header.split(',')]

184

contents = []

185

remaining = count

186

while remaining:

278

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

279

result.append((start, end, count, contents))

280

return result

281

282

def get_fulltext_content(self, lines):

283

"""Extract just the content lines from a fulltext."""

284

return (line.split(' ', 1)[1] for line in lines)

285

286

def get_linedelta_content(self, lines):

287

"""Extract just the content from a line delta.

288

289

This doesn't return all of the extra information stored in a delta.

290

Only the actual content lines.

291

"""

292

lines = iter(lines)

293

next = lines.next

294

for header in lines:

295

header = header.split(',')

296

count = int(header[2])

297

for i in xrange(count):

187

298

origin, text = next().split(' ', 1)

188

remaining -= 1

189

contents.append((origin.decode('utf-8'), text))

190

result.append((start, end, count, contents))

191

return result

299

yield text

192

300

193

301

def lower_fulltext(self, content):

194

302

"""convert a fulltext content record into a serializable form.

195

303

196

304

see parse_fulltext which this inverts.

197

305

"""

198

return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]

306

# TODO: jam 20070209 We only do the caching thing to make sure that

307

# the origin is a valid utf-8 line, eventually we could remove it

308

return ['%s %s' % (o, t) for o, t in content._lines]

199

309

200

310

def lower_line_delta(self, delta):

201

311

"""convert a delta into a serializable form.

202

312

203

313

See parse_line_delta which this inverts.

204

314

"""

315

# TODO: jam 20070209 We only do the caching thing to make sure that

316

# the origin is a valid utf-8 line, eventually we could remove it

205

317

out = []

206

318

for start, end, c, lines in delta:

207

319

out.append('%d,%d,%d\n' % (start, end, c))

208

for origin, text in lines:

209

out.append('%s %s' % (origin.encode('utf-8'), text))

320

out.extend(origin + ' ' + text

321

for origin, text in lines)

210

322

return out

211

323

212

213

class KnitPlainFactory(_KnitFactory):

324

def annotate_iter(self, knit, version_id):

325

content = knit._get_content(version_id)

326

return content.annotate_iter()

327

328

329

class KnitPlainFactory(object):

214

330

"""Factory for creating plain Content objects."""

215

331

216

332

annotated = False

217

333

218

def parse_fulltext(self, content, version):

334

def make(self, lines, version_id):

335

return PlainKnitContent(lines, version_id)

336

337

def parse_fulltext(self, content, version_id):

219

338

"""This parses an unannotated fulltext.

220

339

221

340

Note that this is not a noop - the internal representation

222

341

has (versionid, line) - its just a constant versionid.

223

342

"""

224

return self.make(content, version)

343

return self.make(content, version_id)

225

344

226

def parse_line_delta_iter(self, lines, version):

227

while lines:

228

header = lines.pop(0)

345

def parse_line_delta_iter(self, lines, version_id):

346

cur = 0

347

num_lines = len(lines)

348

while cur < num_lines:

349

header = lines[cur]

350

cur += 1

229

351

start, end, c = [int(n) for n in header.split(',')]

230

yield start, end, c, zip([version] * c, lines[:c])

231

del lines[:c]

232

233

def parse_line_delta(self, lines, version):

234

return list(self.parse_line_delta_iter(lines, version))

235

352

yield start, end, c, lines[cur:cur+c]

353

cur += c

354

355

def parse_line_delta(self, lines, version_id):

356

return list(self.parse_line_delta_iter(lines, version_id))

357

358

def get_fulltext_content(self, lines):

359

"""Extract just the content lines from a fulltext."""

360

return iter(lines)

361

362

def get_linedelta_content(self, lines):

363

"""Extract just the content from a line delta.

364

365

This doesn't return all of the extra information stored in a delta.

366

Only the actual content lines.

367

"""

368

lines = iter(lines)

369

next = lines.next

370

for header in lines:

371

header = header.split(',')

372

count = int(header[2])

373

for i in xrange(count):

374

yield next()

375

236

376

def lower_fulltext(self, content):

237

377

return content.text()

238

378

240

380

out = []

241

381

for start, end, c, lines in delta:

242

382

out.append('%d,%d,%d\n' % (start, end, c))

243

out.extend([text for origin, text in lines])

383

out.extend(lines)

244

384

return out

245

385

386

def annotate_iter(self, knit, version_id):

387

return annotate_knit(knit, version_id)

388

246

389

247

390

def make_empty_knit(transport, relpath):

248

391

"""Construct a empty knit at the specified location."""

249

392

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

250

k._data._open_file()

251

393

252

394

253

395

class KnitVersionedFile(VersionedFile):

265

407

stored and retrieved.

266

408

"""

267

409

268

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

269

basis_knit=None, delta=True, create=False):

410

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

411

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

412

create=False, create_parent_dir=False, delay_create=False,

413

dir_mode=None, index=None, access_method=None):

270

414

"""Construct a knit at location specified by relpath.

271

415

272

416

:param create: If not True, only open an existing knit.

417

:param create_parent_dir: If True, create the parent directory if

418

creating the file fails. (This is used for stores with

419

hash-prefixes that may not exist yet)

420

:param delay_create: The calling code is aware that the knit won't

421

actually be created until the first data is stored.

422

:param index: An index to use for the knit.

273

423

"""

424

if deprecated_passed(basis_knit):

425

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

426

" deprecated as of bzr 0.9.",

427

DeprecationWarning, stacklevel=2)

274

428

if access_mode is None:

275

429

access_mode = 'w'

276

430

super(KnitVersionedFile, self).__init__(access_mode)

277

431

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

278

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

279

type(basis_knit)

280

281

432

self.transport = transport

282

433

self.filename = relpath

283

self.basis_knit = basis_knit

284

434

self.factory = factory or KnitAnnotateFactory()

285

435

self.writable = (access_mode == 'w')

286

436

self.delta = delta

287

437

288

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

289

access_mode, create=create, file_mode=file_mode)

290

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

291

access_mode, create=create and not len(self), file_mode=file_mode)

438

self._max_delta_chain = 200

439

440

if index is None:

441

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

442

access_mode, create=create, file_mode=file_mode,

443

create_parent_dir=create_parent_dir, delay_create=delay_create,

444

dir_mode=dir_mode)

445

else:

446

self._index = index

447

if access_method is None:

448

_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,

449

((create and not len(self)) and delay_create), create_parent_dir)

450

else:

451

_access = access_method

452

if create and not len(self) and not delay_create:

453

_access.create()

454

self._data = _KnitData(_access)

292

455

293

456

def __repr__(self):

294

457

return '%s(%s)' % (self.__class__.__name__,

295

458

self.transport.abspath(self.filename))

296

459

297

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

298

"""See VersionedFile._add_delta()."""

299

self._check_add(version_id, []) # should we check the lines ?

300

self._check_versions_present(parents)

301

present_parents = []

302

ghosts = []

303

parent_texts = {}

304

for parent in parents:

305

if not self.has_version(parent):

306

ghosts.append(parent)

307

else:

308

present_parents.append(parent)

309

310

if delta_parent is None:

311

# reconstitute as full text.

312

assert len(delta) == 1 or len(delta) == 0

313

if len(delta):

314

assert delta[0][0] == 0

315

assert delta[0][1] == 0, delta[0][1]

316

return super(KnitVersionedFile, self)._add_delta(version_id,

317

parents,

318

delta_parent,

319

sha1,

320

noeol,

321

delta)

322

323

digest = sha1

324

325

options = []

326

if noeol:

327

options.append('no-eol')

328

329

if delta_parent is not None:

330

# determine the current delta chain length.

331

# To speed the extract of texts the delta chain is limited

332

# to a fixed number of deltas. This should minimize both

333

# I/O and the time spend applying deltas.

334

count = 0

335

delta_parents = [delta_parent]

336

while count < 25:

337

parent = delta_parents[0]

338

method = self._index.get_method(parent)

339

if method == 'fulltext':

340

break

341

delta_parents = self._index.get_parents(parent)

342

count = count + 1

343

if method == 'line-delta':

344

# did not find a fulltext in the delta limit.

345

# just do a normal insertion.

346

return super(KnitVersionedFile, self)._add_delta(version_id,

347

parents,

348

delta_parent,

349

sha1,

350

noeol,

351

delta)

352

353

options.append('line-delta')

354

store_lines = self.factory.lower_line_delta(delta)

355

356

where, size = self._data.add_record(version_id, digest, store_lines)

357

self._index.add_version(version_id, options, where, size, parents)

460

def _check_should_delta(self, first_parents):

461

"""Iterate back through the parent listing, looking for a fulltext.

462

463

This is used when we want to decide whether to add a delta or a new

464

fulltext. It searches for _max_delta_chain parents. When it finds a

465

fulltext parent, it sees if the total size of the deltas leading up to

466

it is large enough to indicate that we want a new full text anyway.

467

468

Return True if we should create a new delta, False if we should use a

469

full text.

470

"""

471

delta_size = 0

472

fulltext_size = None

473

delta_parents = first_parents

474

for count in xrange(self._max_delta_chain):

475

parent = delta_parents[0]

476

method = self._index.get_method(parent)

477

index, pos, size = self._index.get_position(parent)

478

if method == 'fulltext':

479

fulltext_size = size

480

break

481

delta_size += size

482

delta_parents = self._index.get_parents(parent)

483

else:

484

# We couldn't find a fulltext, so we must create a new one

485

return False

486

487

return fulltext_size > delta_size

358

488

359

489

def _add_raw_records(self, records, data):

360

490

"""Add all the records 'records' with data pre-joined in 'data'.

362

492

:param records: A list of tuples(version_id, options, parents, size).

363

493

:param data: The data for the records. When it is written, the records

364

494

are adjusted to have pos pointing into data by the sum of

365

the preceeding records sizes.

495

the preceding records sizes.

366

496

"""

367

497

# write all the data

368

pos = self._data.add_raw_record(data)

498

raw_record_sizes = [record[3] for record in records]

499

positions = self._data.add_raw_records(raw_record_sizes, data)

500

offset = 0

369

501

index_entries = []

370

for (version_id, options, parents, size) in records:

371

index_entries.append((version_id, options, pos, size, parents))

372

pos += size

502

for (version_id, options, parents, size), access_memo in zip(

503

records, positions):

504

index_entries.append((version_id, options, access_memo, parents))

505

if self._data._do_cache:

506

self._data._cache[version_id] = data[offset:offset+size]

507

offset += size

373

508

self._index.add_versions(index_entries)

374

509

510

def enable_cache(self):

511

"""Start caching data for this knit"""

512

self._data.enable_cache()

513

375

514

def clear_cache(self):

376

515

"""Clear the data cache only."""

377

516

self._data.clear_cache()

380

519

"""See VersionedFile.copy_to()."""

381

520

# copy the current index to a temp index to avoid racing with local

382

521

# writes

383

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)

522

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

523

self.transport.get(self._index._filename))

384

524

# copy the data file

385

transport.put(name + DATA_SUFFIX, self._data._open_file())

386

# rename the copied index into place

387

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

525

f = self._data._open_file()

526

try:

527

transport.put_file(name + DATA_SUFFIX, f)

528

finally:

529

f.close()

530

# move the copied index into place

531

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

388

532

389

533

def create_empty(self, name, transport, mode=None):

390

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

534

return KnitVersionedFile(name, transport, factory=self.factory,

535

delta=self.delta, create=True)

391

536

392

def _fix_parents(self, version, new_parents):

393

"""Fix the parents list for version.

537

def get_data_stream(self, required_versions):

538

"""Get a data stream for the specified versions.

539

540

Versions may be returned in any order, not necessarily the order

541

specified.

542

543

:param required_versions: The exact set of versions to be extracted.

544

Unlike some other knit methods, this is not used to generate a

545

transitive closure, rather it is used precisely as given.

394

546

395

This is done by appending a new version to the index

396

with identical data except for the parents list.

397

the parents list must be a superset of the current

398

list.

547

:returns: format_signature, list of (version, options, length, parents),

548

reader_callable.

399

549

"""

400

current_values = self._index._cache[version]

401

assert set(current_values[4]).difference(set(new_parents)) == set()

402

self._index.add_version(version,

403

current_values[1],

404

current_values[2],

405

current_values[3],

406

new_parents)

550

required_versions = set([osutils.safe_revision_id(v) for v in

551

required_versions])

552

# we don't care about inclusions, the caller cares.

553

# but we need to setup a list of records to visit.

554

for version_id in required_versions:

555

if not self.has_version(version_id):

556

raise RevisionNotPresent(version_id, self.filename)

557

# Pick the desired versions out of the index in oldest-to-newest order

558

version_list = []

559

for version_id in self.versions():

560

if version_id in required_versions:

561

version_list.append(version_id)

562

563

# create the list of version information for the result

564

copy_queue_records = []

565

copy_set = set()

566

result_version_list = []

567

for version_id in version_list:

568

options = self._index.get_options(version_id)

569

parents = self._index.get_parents_with_ghosts(version_id)

570

index_memo = self._index.get_position(version_id)

571

copy_queue_records.append((version_id, index_memo))

572

none, data_pos, data_size = index_memo

573

copy_set.add(version_id)

574

# version, options, length, parents

575

result_version_list.append((version_id, options, data_size,

576

parents))

577

578

# Read the compressed record data.

579

# XXX:

580

# From here down to the return should really be logic in the returned

581

# callable -- in a class that adapts read_records_iter_raw to read

582

# requests.

583

raw_datum = []

584

for (version_id, raw_data), \

585

(version_id2, options, _, parents) in \

586

izip(self._data.read_records_iter_raw(copy_queue_records),

587

result_version_list):

588

assert version_id == version_id2, 'logic error, inconsistent results'

589

raw_datum.append(raw_data)

590

pseudo_file = StringIO(''.join(raw_datum))

591

def read(length):

592

if length is None:

593

return pseudo_file.read()

594

else:

595

return pseudo_file.read(length)

596

return (self.get_format_signature(), result_version_list, read)

597

598

def _extract_blocks(self, version_id, source, target):

599

if self._index.get_method(version_id) != 'line-delta':

600

return None

601

parent, sha1, noeol, delta = self.get_delta(version_id)

602

return KnitContent.get_line_delta_blocks(delta, source, target)

407

603

408

604

def get_delta(self, version_id):

409

605

"""Get a delta for constructing version from some other version."""

410

if not self.has_version(version_id):

411

raise RevisionNotPresent(version_id, self.filename)

412

606

version_id = osutils.safe_revision_id(version_id)

607

self.check_not_reserved_id(version_id)

413

608

parents = self.get_parents(version_id)

414

609

if len(parents):

415

610

parent = parents[0]

416

611

else:

417

612

parent = None

418

data_pos, data_size = self._index.get_position(version_id)

419

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

420

version_idx = self._index.lookup(version_id)

613

index_memo = self._index.get_position(version_id)

614

data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]

421

615

noeol = 'no-eol' in self._index.get_options(version_id)

422

616

if 'fulltext' == self._index.get_method(version_id):

423

new_content = self.factory.parse_fulltext(data, version_idx)

617

new_content = self.factory.parse_fulltext(data, version_id)

424

618

if parent is not None:

425

619

reference_content = self._get_content(parent)

426

620

old_texts = reference_content.text()

427

621

else:

428

622

old_texts = []

429

623

new_texts = new_content.text()

430

delta_seq = SequenceMatcher(None, old_texts, new_texts)

624

delta_seq = patiencediff.PatienceSequenceMatcher(None, old_texts,

625

new_texts)

431

626

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

432

627

else:

433

delta = self.factory.parse_line_delta(data, version_idx)

628

delta = self.factory.parse_line_delta(data, version_id)

434

629

return parent, sha1, noeol, delta

630

631

def get_format_signature(self):

632

"""See VersionedFile.get_format_signature()."""

633

if self.factory.annotated:

634

annotated_part = "annotated"

635

else:

636

annotated_part = "plain"

637

return "knit-%s" % (annotated_part,)

435

638

436

639

def get_graph_with_ghosts(self):

437

640

"""See VersionedFile.get_graph_with_ghosts()."""

439

642

return dict(graph_items)

440

643

441

644

def get_sha1(self, version_id):

645

return self.get_sha1s([version_id])[0]

646

647

def get_sha1s(self, version_ids):

442

648

"""See VersionedFile.get_sha1()."""

443

components = self._get_components(version_id)

444

return components[-1][-1][-1]

649

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

650

record_map = self._get_record_map(version_ids)

651

# record entry 2 is the 'digest'.

652

return [record_map[v][2] for v in version_ids]

445

653

446

654

@staticmethod

447

655

def get_suffixes():

450

658

451

659

def has_ghost(self, version_id):

452

660

"""True if there is a ghost reference in the file to version_id."""

661

version_id = osutils.safe_revision_id(version_id)

453

662

# maybe we have it

454

663

if self.has_version(version_id):

455

664

return False

462

671

return True

463

672

return False

464

673

674

def insert_data_stream(self, (format, data_list, reader_callable)):

675

"""Insert knit records from a data stream into this knit.

676

677

If a version in the stream is already present in this knit, it will not

678

be inserted a second time. It will be checked for consistency with the

679

stored version however, and may cause a KnitCorrupt error to be raised

680

if the data in the stream disagrees with the already stored data.

681

682

:seealso: get_data_stream

683

"""

684

if format != self.get_format_signature():

685

trace.mutter('incompatible format signature inserting to %r', self)

686

raise KnitDataStreamIncompatible(

687

format, self.get_format_signature())

688

689

for version_id, options, length, parents in data_list:

690

if self.has_version(version_id):

691

# First check: the list of parents.

692

my_parents = self.get_parents_with_ghosts(version_id)

693

if my_parents != parents:

694

# XXX: KnitCorrupt is not quite the right exception here.

695

raise KnitCorrupt(

696

self.filename,

697

'parents list %r from data stream does not match '

698

'already recorded parents %r for %s'

699

% (parents, my_parents, version_id))

700

701

# Also check the SHA-1 of the fulltext this content will

702

# produce.

703

raw_data = reader_callable(length)

704

my_fulltext_sha1 = self.get_sha1(version_id)

705

df, rec = self._data._parse_record_header(version_id, raw_data)

706

stream_fulltext_sha1 = rec[3]

707

if my_fulltext_sha1 != stream_fulltext_sha1:

708

# Actually, we don't know if it's this knit that's corrupt,

709

# or the data stream we're trying to insert.

710

raise KnitCorrupt(

711

self.filename, 'sha-1 does not match %s' % version_id)

712

else:

713

self._add_raw_records(

714

[(version_id, options, parents, length)],

715

reader_callable(length))

716

465

717

def versions(self):

466

718

"""See VersionedFile.versions."""

719

if 'evil' in debug.debug_flags:

720

trace.mutter_callsite(2, "versions scales with size of history")

467

721

return self._index.get_versions()

468

722

469

723

def has_version(self, version_id):

470

724

"""See VersionedFile.has_version."""

725

if 'evil' in debug.debug_flags:

726

trace.mutter_callsite(2, "has_version is a LBYL scenario")

727

version_id = osutils.safe_revision_id(version_id)

471

728

return self._index.has_version(version_id)

472

729

473

730

__contains__ = has_version

474

731

475

732

def _merge_annotations(self, content, parents, parent_texts={},

476

delta=None, annotated=None):

733

delta=None, annotated=None,

734

left_matching_blocks=None):

477

735

"""Merge annotations for content. This is done by comparing

478

736

the annotations based on changed to the text.

479

737

"""

738

if left_matching_blocks is not None:

739

delta_seq = diff._PrematchedMatcher(left_matching_blocks)

740

else:

741

delta_seq = None

480

742

if annotated:

481

delta_seq = None

482

743

for parent_id in parents:

483

744

merge_content = self._get_content(parent_id, parent_texts)

484

seq = SequenceMatcher(None, merge_content.text(), content.text())

485

if delta_seq is None:

486

# setup a delta seq to reuse.

487

delta_seq = seq

745

if (parent_id == parents[0] and delta_seq is not None):

746

seq = delta_seq

747

else:

748

seq = patiencediff.PatienceSequenceMatcher(

749

None, merge_content.text(), content.text())

488

750

for i, j, n in seq.get_matching_blocks():

489

751

if n == 0:

490

752

continue

491

# this appears to copy (origin, text) pairs across to the new

492

# content for any line that matches the last-checked parent.

493

# FIXME: save the sequence control data for delta compression

494

# against the most relevant parent rather than rediffing.

753

# this appears to copy (origin, text) pairs across to the

754

# new content for any line that matches the last-checked

755

# parent.

495

756

content._lines[j:j+n] = merge_content._lines[i:i+n]

496

757

if delta:

497

if not annotated:

758

if delta_seq is None:

498

759

reference_content = self._get_content(parents[0], parent_texts)

499

760

new_texts = content.text()

500

761

old_texts = reference_content.text()

501

delta_seq = SequenceMatcher(None, old_texts, new_texts)

762

delta_seq = patiencediff.PatienceSequenceMatcher(

763

None, old_texts, new_texts)

502

764

return self._make_line_delta(delta_seq, content)

503

765

504

766

def _make_line_delta(self, delta_seq, new_content):

510

772

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

511

773

return diff_hunks

512

774

513

def _get_components(self, version_id):

514

"""Return a list of (version_id, method, data) tuples that

515

makes up version specified by version_id of the knit.

516

517

The components should be applied in the order of the returned

518

list.

519

520

The basis knit will be used to the largest extent possible

521

since it is assumed that accesses to it is faster.

775

def _get_components_positions(self, version_ids):

776

"""Produce a map of position data for the components of versions.

777

778

This data is intended to be used for retrieving the knit records.

779

780

A dict of version_id to (method, data_pos, data_size, next) is

781

returned.

782

method is the way referenced data should be applied.

783

data_pos is the position of the data in the knit.

784

data_size is the size of the data in the knit.

785

next is the build-parent of the version, or None for fulltexts.

522

786

"""

523

#profile notes:

524

# 4168 calls in 14912, 2289 internal

525

# 4168 in 9711 to read_records

526

# 52554 in 1250 to get_parents

527

# 170166 in 865 to list.append

528

529

# needed_revisions holds a list of (method, version_id) of

530

# versions that is needed to be fetched to construct the final

531

# version of the file.

532

533

# basis_revisions is a list of versions that needs to be

534

# fetched but exists in the basis knit.

535

536

basis = self.basis_knit

537

needed_versions = []

538

basis_versions = []

539

cursor = version_id

540

541

while 1:

542

picked_knit = self

543

if basis and basis._index.has_version(cursor):

544

picked_knit = basis

545

basis_versions.append(cursor)

546

method = picked_knit._index.get_method(cursor)

547

needed_versions.append((method, cursor))

548

if method == 'fulltext':

549

break

550

cursor = picked_knit.get_parents(cursor)[0]

551

552

components = {}

553

if basis_versions:

554

records = []

555

for comp_id in basis_versions:

556

data_pos, data_size = basis._index.get_data_position(comp_id)

557

records.append((piece_id, data_pos, data_size))

558

components.update(basis._data.read_records(records))

559

560

records = []

561

for comp_id in [vid for method, vid in needed_versions

562

if vid not in basis_versions]:

563

data_pos, data_size = self._index.get_position(comp_id)

564

records.append((comp_id, data_pos, data_size))

565

components.update(self._data.read_records(records))

566

567

# get_data_records returns a mapping with the version id as

568

# index and the value as data. The order the components need

569

# to be applied is held by needed_versions (reversed).

570

out = []

571

for method, comp_id in reversed(needed_versions):

572

out.append((comp_id, method, components[comp_id]))

573

574

return out

575

787

component_data = {}

788

for version_id in version_ids:

789

cursor = version_id

790

791

while cursor is not None and cursor not in component_data:

792

method = self._index.get_method(cursor)

793

if method == 'fulltext':

794

next = None

795

else:

796

next = self.get_parents(cursor)[0]

797

index_memo = self._index.get_position(cursor)

798

component_data[cursor] = (method, index_memo, next)

799

cursor = next

800

return component_data

801

576

802

def _get_content(self, version_id, parent_texts={}):

577

803

"""Returns a content object that makes up the specified

578

804

version."""

579

if not self.has_version(version_id):

580

raise RevisionNotPresent(version_id, self.filename)

581

582

805

cached_version = parent_texts.get(version_id, None)

583

806

if cached_version is not None:

807

if not self.has_version(version_id):

808

raise RevisionNotPresent(version_id, self.filename)

584

809

return cached_version

585

810

586

if self.basis_knit and version_id in self.basis_knit:

587

return self.basis_knit._get_content(version_id)

588

589

content = None

590

components = self._get_components(version_id)

591

for component_id, method, (data, digest) in components:

592

version_idx = self._index.lookup(component_id)

593

if method == 'fulltext':

594

assert content is None

595

content = self.factory.parse_fulltext(data, version_idx)

596

elif method == 'line-delta':

597

delta = self.factory.parse_line_delta(data, version_idx)

598

content._lines = self._apply_delta(content._lines, delta)

599

600

if 'no-eol' in self._index.get_options(version_id):

601

line = content._lines[-1][1].rstrip('\n')

602

content._lines[-1] = (content._lines[-1][0], line)

603

604

# digest here is the digest from the last applied component.

605

if sha_strings(content.text()) != digest:

606

import pdb;pdb.set_trace()

607

raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)

608

609

return content

811

text_map, contents_map = self._get_content_maps([version_id])

812

return contents_map[version_id]

610

813

611

814

def _check_versions_present(self, version_ids):

612

815

"""Check that all specified versions are present."""

613

version_ids = set(version_ids)

614

for r in list(version_ids):

615

if self._index.has_version(r):

616

version_ids.remove(r)

617

if version_ids:

618

raise RevisionNotPresent(list(version_ids)[0], self.filename)

816

self._index.check_versions_present(version_ids)

619

817

620

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

818

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,

819

nostore_sha, random_id, check_content):

621

820

"""See VersionedFile.add_lines_with_ghosts()."""

622

self._check_add(version_id, lines)

623

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

821

self._check_add(version_id, lines, random_id, check_content)

822

return self._add(version_id, lines, parents, self.delta,

823

parent_texts, None, nostore_sha)

624

824

625

def _add_lines(self, version_id, parents, lines, parent_texts):

825

def _add_lines(self, version_id, parents, lines, parent_texts,

826

left_matching_blocks, nostore_sha, random_id, check_content):

626

827

"""See VersionedFile.add_lines."""

627

self._check_add(version_id, lines)

828

self._check_add(version_id, lines, random_id, check_content)

628

829

self._check_versions_present(parents)

629

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

830

return self._add(version_id, lines[:], parents, self.delta,

831

parent_texts, left_matching_blocks, nostore_sha)

630

832

631

def _check_add(self, version_id, lines):

833

def _check_add(self, version_id, lines, random_id, check_content):

632

834

"""check that version_id and lines are safe to add."""

633

assert self.writable, "knit is not opened for write"

634

### FIXME escape. RBC 20060228

635

835

if contains_whitespace(version_id):

636

836

raise InvalidRevisionId(version_id, self.filename)

637

if self.has_version(version_id):

837

self.check_not_reserved_id(version_id)

838

# Technically this could be avoided if we are happy to allow duplicate

839

# id insertion when other things than bzr core insert texts, but it

840

# seems useful for folk using the knit api directly to have some safety

841

# blanket that we can disable.

842

if not random_id and self.has_version(version_id):

638

843

raise RevisionAlreadyPresent(version_id, self.filename)

639

self._check_lines_not_unicode(lines)

640

self._check_lines_are_lines(lines)

844

if check_content:

845

self._check_lines_not_unicode(lines)

846

self._check_lines_are_lines(lines)

641

847

642

def _add(self, version_id, lines, parents, delta, parent_texts):

848

def _add(self, version_id, lines, parents, delta, parent_texts,

849

left_matching_blocks, nostore_sha):

643

850

"""Add a set of lines on top of version specified by parents.

644

851

645

852

If delta is true, compress the text as a line-delta against

660

867

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

661

868

662

869

present_parents = []

663

ghosts = []

664

870

if parent_texts is None:

665

871

parent_texts = {}

666

872

for parent in parents:

667

if not self.has_version(parent):

668

ghosts.append(parent)

669

else:

873

if self.has_version(parent):

670

874

present_parents.append(parent)

671

875

672

if delta and not len(present_parents):

876

# can only compress against the left most present parent.

877

if (delta and

878

(len(present_parents) == 0 or

879

present_parents[0] != parents[0])):

673

880

delta = False

674

881

675

882

digest = sha_strings(lines)

883

if nostore_sha == digest:

884

raise errors.ExistingContent

885

text_length = sum(map(len, lines))

676

886

options = []

677

887

if lines:

678

888

if lines[-1][-1] != '\n':

889

# copy the contents of lines.

890

lines = lines[:]

679

891

options.append('no-eol')

680

892

lines[-1] = lines[-1] + '\n'

681

893

682

if len(present_parents) and delta:

894

if delta:

683

895

# To speed the extract of texts the delta chain is limited

684

896

# to a fixed number of deltas. This should minimize both

685

897

# I/O and the time spend applying deltas.

686

count = 0

687

delta_parents = present_parents

688

while count < 25:

689

parent = delta_parents[0]

690

method = self._index.get_method(parent)

691

if method == 'fulltext':

692

break

693

delta_parents = self._index.get_parents(parent)

694

count = count + 1

695

if method == 'line-delta':

696

delta = False

898

delta = self._check_should_delta(present_parents)

697

899

698

lines = self.factory.make(lines, version_id)

900

assert isinstance(version_id, str)

901

content = self.factory.make(lines, version_id)

699

902

if delta or (self.factory.annotated and len(present_parents) > 0):

700

# Merge annotations from parent texts if so is needed.

701

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

702

delta, self.factory.annotated)

903

# Merge annotations from parent texts if needed.

904

delta_hunks = self._merge_annotations(content, present_parents,

905

parent_texts, delta, self.factory.annotated,

906

left_matching_blocks)

703

907

704

908

if delta:

705

909

options.append('line-delta')

706

910

store_lines = self.factory.lower_line_delta(delta_hunks)

707

911

else:

708

912

options.append('fulltext')

709

store_lines = self.factory.lower_fulltext(lines)

913

store_lines = self.factory.lower_fulltext(content)

710

914

711

where, size = self._data.add_record(version_id, digest, store_lines)

712

self._index.add_version(version_id, options, where, size, parents)

713

return lines

915

access_memo = self._data.add_record(version_id, digest, store_lines)

916

self._index.add_version(version_id, options, access_memo, parents)

917

return digest, text_length, content

714

918

715

919

def check(self, progress_bar=None):

716

920

"""See VersionedFile.check()."""

717

921

718

922

def _clone_text(self, new_version_id, old_version_id, parents):

719

923

"""See VersionedFile.clone_text()."""

720

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

924

# FIXME RBC 20060228 make fast by only inserting an index with null

925

# delta.

721

926

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

722

927

723

928

def get_lines(self, version_id):

724

929

"""See VersionedFile.get_lines()."""

725

return self._get_content(version_id).text()

726

727

def iter_lines_added_or_present_in_versions(self, version_ids=None):

930

return self.get_line_list([version_id])[0]

931

932

def _get_record_map(self, version_ids):

933

"""Produce a dictionary of knit records.

934

935

The keys are version_ids, the values are tuples of (method, content,

936

digest, next).

937

method is the way the content should be applied.

938

content is a KnitContent object.

939

digest is the SHA1 digest of this version id after all steps are done

940

next is the build-parent of the version, i.e. the leftmost ancestor.

941

If the method is fulltext, next will be None.

942

"""

943

position_map = self._get_components_positions(version_ids)

944

# c = component_id, m = method, i_m = index_memo, n = next

945

records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]

946

record_map = {}

947

for component_id, content, digest in \

948

self._data.read_records_iter(records):

949

method, index_memo, next = position_map[component_id]

950

record_map[component_id] = method, content, digest, next

951

952

return record_map

953

954

def get_text(self, version_id):

955

"""See VersionedFile.get_text"""

956

return self.get_texts([version_id])[0]

957

958

def get_texts(self, version_ids):

959

return [''.join(l) for l in self.get_line_list(version_ids)]

960

961

def get_line_list(self, version_ids):

962

"""Return the texts of listed versions as a list of strings."""

963

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

964

for version_id in version_ids:

965

self.check_not_reserved_id(version_id)

966

text_map, content_map = self._get_content_maps(version_ids)

967

return [text_map[v] for v in version_ids]

968

969

_get_lf_split_line_list = get_line_list

970

971

def _get_content_maps(self, version_ids):

972

"""Produce maps of text and KnitContents

973

974

:return: (text_map, content_map) where text_map contains the texts for

975

the requested versions and content_map contains the KnitContents.

976

Both dicts take version_ids as their keys.

977

"""

978

for version_id in version_ids:

979

if not self.has_version(version_id):

980

raise RevisionNotPresent(version_id, self.filename)

981

record_map = self._get_record_map(version_ids)

982

983

text_map = {}

984

content_map = {}

985

final_content = {}

986

for version_id in version_ids:

987

components = []

988

cursor = version_id

989

while cursor is not None:

990

method, data, digest, next = record_map[cursor]

991

components.append((cursor, method, data, digest))

992

if cursor in content_map:

993

break

994

cursor = next

995

996

content = None

997

for component_id, method, data, digest in reversed(components):

998

if component_id in content_map:

999

content = content_map[component_id]

1000

else:

1001

if method == 'fulltext':

1002

assert content is None

1003

content = self.factory.parse_fulltext(data, version_id)

1004

elif method == 'line-delta':

1005

delta = self.factory.parse_line_delta(data, version_id)

1006

content = content.copy()

1007

content._lines = self._apply_delta(content._lines,

1008

delta)

1009

content_map[component_id] = content

1010

1011

if 'no-eol' in self._index.get_options(version_id):

1012

content = content.copy()

1013

content.strip_last_line_newline()

1014

final_content[version_id] = content

1015

1016

# digest here is the digest from the last applied component.

1017

text = content.text()

1018

if sha_strings(text) != digest:

1019

raise KnitCorrupt(self.filename,

1020

'sha-1 does not match %s' % version_id)

1021

1022

text_map[version_id] = text

1023

return text_map, final_content

1024

1025

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1026

pb=None):

728

1027

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

729

1028

if version_ids is None:

730

1029

version_ids = self.versions()

731

# we dont care about inclusions, the caller cares.

1030

else:

1031

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

1032

if pb is None:

1033

pb = progress.DummyProgress()

1034

# we don't care about inclusions, the caller cares.

732

1035

# but we need to setup a list of records to visit.

733

1036

# we need version_id, position, length

734

1037

version_id_records = []

735

requested_versions = list(version_ids)

1038

requested_versions = set(version_ids)

736

1039

# filter for available versions

737

1040

for version_id in requested_versions:

738

1041

if not self.has_version(version_id):

739

1042

raise RevisionNotPresent(version_id, self.filename)

740

1043

# get a in-component-order queue:

741

version_ids = []

742

1044

for version_id in self.versions():

743

1045

if version_id in requested_versions:

744

version_ids.append(version_id)

745

data_pos, length = self._index.get_position(version_id)

746

version_id_records.append((version_id, data_pos, length))

1046

index_memo = self._index.get_position(version_id)

1047

version_id_records.append((version_id, index_memo))

747

1048

748

pb = bzrlib.ui.ui_factory.nested_progress_bar()

749

count = 0

750

1049

total = len(version_id_records)

751

try:

752

pb.update('Walking content.', count, total)

753

for version_id, data, sha_value in \

754

self._data.read_records_iter(version_id_records):

755

pb.update('Walking content.', count, total)

756

method = self._index.get_method(version_id)

757

version_idx = self._index.lookup(version_id)

758

assert method in ('fulltext', 'line-delta')

759

if method == 'fulltext':

760

content = self.factory.parse_fulltext(data, version_idx)

761

for line in content.text():

762

yield line

763

else:

764

delta = self.factory.parse_line_delta(data, version_idx)

765

for start, end, count, lines in delta:

766

for origin, line in lines:

767

yield line

768

count +=1

769

pb.update('Walking content.', total, total)

770

pb.finished()

771

except:

772

pb.update('Walking content.', total, total)

773

pb.finished()

774

raise

1050

for version_idx, (version_id, data, sha_value) in \

1051

enumerate(self._data.read_records_iter(version_id_records)):

1052

pb.update('Walking content.', version_idx, total)

1053

method = self._index.get_method(version_id)

1054

1055

assert method in ('fulltext', 'line-delta')

1056

if method == 'fulltext':

1057

line_iterator = self.factory.get_fulltext_content(data)

1058

else:

1059

line_iterator = self.factory.get_linedelta_content(data)

1060

for line in line_iterator:

1061

yield line

1062

1063

pb.update('Walking content.', total, total)

775

1064

1065

def iter_parents(self, version_ids):

1066

"""Iterate through the parents for many version ids.

1067

1068

:param version_ids: An iterable yielding version_ids.

1069

:return: An iterator that yields (version_id, parents). Requested

1070

version_ids not present in the versioned file are simply skipped.

1071

The order is undefined, allowing for different optimisations in

1072

the underlying implementation.

1073

"""

1074

version_ids = [osutils.safe_revision_id(version_id) for

1075

version_id in version_ids]

1076

return self._index.iter_parents(version_ids)

1077

776

1078

def num_versions(self):

777

1079

"""See VersionedFile.num_versions()."""

778

1080

return self._index.num_versions()

781

1083

782

1084

def annotate_iter(self, version_id):

783

1085

"""See VersionedFile.annotate_iter."""

784

content = self._get_content(version_id)

785

for origin, text in content.annotate_iter():

786

yield origin, text

1086

version_id = osutils.safe_revision_id(version_id)

1087

return self.factory.annotate_iter(self, version_id)

787

1088

788

1089

def get_parents(self, version_id):

789

1090

"""See VersionedFile.get_parents."""

790

1091

# perf notes:

791

1092

# optimism counts!

792

1093

# 52554 calls in 1264 872 internal down from 3674

1094

version_id = osutils.safe_revision_id(version_id)

793

1095

try:

794

1096

return self._index.get_parents(version_id)

795

1097

except KeyError:

797

1099

798

1100

def get_parents_with_ghosts(self, version_id):

799

1101

"""See VersionedFile.get_parents."""

1102

version_id = osutils.safe_revision_id(version_id)

800

1103

try:

801

1104

return self._index.get_parents_with_ghosts(version_id)

802

1105

except KeyError:

803

1106

raise RevisionNotPresent(version_id, self.filename)

804

1107

805

def get_ancestry(self, versions):

1108

def get_ancestry(self, versions, topo_sorted=True):

806

1109

"""See VersionedFile.get_ancestry."""

807

1110

if isinstance(versions, basestring):

808

1111

versions = [versions]

809

1112

if not versions:

810

1113

return []

811

self._check_versions_present(versions)

812

return self._index.get_ancestry(versions)

1114

versions = [osutils.safe_revision_id(v) for v in versions]

1115

return self._index.get_ancestry(versions, topo_sorted)

813

1116

814

1117

def get_ancestry_with_ghosts(self, versions):

815

1118

"""See VersionedFile.get_ancestry_with_ghosts."""

817

1120

versions = [versions]

818

1121

if not versions:

819

1122

return []

820

self._check_versions_present(versions)

1123

versions = [osutils.safe_revision_id(v) for v in versions]

821

1124

return self._index.get_ancestry_with_ghosts(versions)

822

1125

823

#@deprecated_method(zero_eight)

824

def walk(self, version_ids):

825

"""See VersionedFile.walk."""

826

# We take the short path here, and extract all relevant texts

827

# and put them in a weave and let that do all the work. Far

828

# from optimal, but is much simpler.

829

# FIXME RB 20060228 this really is inefficient!

830

from bzrlib.weave import Weave

831

832

w = Weave(self.filename)

833

ancestry = self.get_ancestry(version_ids)

834

sorted_graph = topo_sort(self._index.get_graph())

835

version_list = [vid for vid in sorted_graph if vid in ancestry]

836

837

for version_id in version_list:

838

lines = self.get_lines(version_id)

839

w.add_lines(version_id, self.get_parents(version_id), lines)

840

841

for lineno, insert_id, dset, line in w.walk(version_ids):

842

yield lineno, insert_id, dset, line

843

844

1126

def plan_merge(self, ver_a, ver_b):

845

1127

"""See VersionedFile.plan_merge."""

846

ancestors_b = set(self.get_ancestry(ver_b))

847

def status_a(revision, text):

848

if revision in ancestors_b:

849

return 'killed-b', text

850

else:

851

return 'new-a', text

1128

ver_a = osutils.safe_revision_id(ver_a)

1129

ver_b = osutils.safe_revision_id(ver_b)

1130

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

852

1131

853

ancestors_a = set(self.get_ancestry(ver_a))

854

def status_b(revision, text):

855

if revision in ancestors_a:

856

return 'killed-a', text

857

else:

858

return 'new-b', text

859

1132

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

860

1133

annotated_a = self.annotate(ver_a)

861

1134

annotated_b = self.annotate(ver_b)

862

plain_a = [t for (a, t) in annotated_a]

863

plain_b = [t for (a, t) in annotated_b]

864

blocks = SequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

865

a_cur = 0

866

b_cur = 0

867

for ai, bi, l in blocks:

868

# process all mismatched sections

869

# (last mismatched section is handled because blocks always

870

# includes a 0-length last block)

871

for revision, text in annotated_a[a_cur:ai]:

872

yield status_a(revision, text)

873

for revision, text in annotated_b[b_cur:bi]:

874

yield status_b(revision, text)

875

876

# and now the matched section

877

a_cur = ai + l

878

b_cur = bi + l

879

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

880

assert text_a == text_b

881

yield "unchanged", text_a

1135

return merge._plan_annotate_merge(annotated_a, annotated_b,

1136

ancestors_a, ancestors_b)

882

1137

883

1138

884

1139

class _KnitComponentFile(object):

885

1140

"""One of the files used to implement a knit database"""

886

1141

887

def __init__(self, transport, filename, mode, file_mode=None):

1142

def __init__(self, transport, filename, mode, file_mode=None,

1143

create_parent_dir=False, dir_mode=None):

888

1144

self._transport = transport

889

1145

self._filename = filename

890

1146

self._mode = mode

891

self._file_mode=file_mode

1147

self._file_mode = file_mode

1148

self._dir_mode = dir_mode

1149

self._create_parent_dir = create_parent_dir

1150

self._need_to_create = False

892

1151

893

def write_header(self):

894

if self._transport.append(self._filename, StringIO(self.HEADER),

895

mode=self._file_mode):

896

raise KnitCorrupt(self._filename, 'misaligned after writing header')

1152

def _full_path(self):

1153

"""Return the full path to this file."""

1154

return self._transport.base + self._filename

897

1155

898

1156

def check_header(self, fp):

899

1157

line = fp.readline()

1158

if line == '':

1159

# An empty file can actually be treated as though the file doesn't

1160

# exist yet.

1161

raise errors.NoSuchFile(self._full_path())

900

1162

if line != self.HEADER:

901

raise KnitHeaderError(badline=line)

902

903

def commit(self):

904

"""Commit is a nop."""

1163

raise KnitHeaderError(badline=line,

1164

filename=self._transport.abspath(self._filename))

905

1165

906

1166

def __repr__(self):

907

1167

return '%s(%s)' % (self.__class__.__name__, self._filename)

932

1192

933

1193

The index file on disc contains a header, followed by one line per knit

934

1194

record. The same revision can be present in an index file more than once.

935

The first occurence gets assigned a sequence number starting from 0.

1195

The first occurrence gets assigned a sequence number starting from 0.

936

1196

937

1197

The format of a single line is

938

1198

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

949

1209

The ' :' marker is the end of record marker.

950

1210

951

1211

partial writes:

952

when a write is interrupted to the index file, it will result in a line that

953

does not end in ' :'. If the ' :' is not present at the end of a line, or at

954

the end of the file, then the record that is missing it will be ignored by

955

the parser.

1212

when a write is interrupted to the index file, it will result in a line

1213

that does not end in ' :'. If the ' :' is not present at the end of a line,

1214

or at the end of the file, then the record that is missing it will be

1215

ignored by the parser.

956

1216

957

When writing new records to the index file, the data is preceeded by '\n'

1217

When writing new records to the index file, the data is preceded by '\n'

958

1218

to ensure that records always start on new lines even if the last write was

959

1219

interrupted. As a result its normal for the last line in the index to be

960

1220

missing a trailing newline. One can be added with no harmful effects.

967

1227

968

1228

def _cache_version(self, version_id, options, pos, size, parents):

969

1229

"""Cache a version record in the history array and index cache.

970

971

This is inlined into __init__ for performance. KEEP IN SYNC.

1230

1231

This is inlined into _load_data for performance. KEEP IN SYNC.

972

1232

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

973

1233

indexes).

974

1234

"""

979

1239

self._history.append(version_id)

980

1240

else:

981

1241

index = self._cache[version_id][5]

982

self._cache[version_id] = (version_id,

1242

self._cache[version_id] = (version_id,

983

1243

options,

984

1244

pos,

985

1245

size,

986

1246

parents,

987

1247

index)

988

1248

989

def __init__(self, transport, filename, mode, create=False, file_mode=None):

990

_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)

1249

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1250

create_parent_dir=False, delay_create=False, dir_mode=None):

1251

_KnitComponentFile.__init__(self, transport, filename, mode,

1252

file_mode=file_mode,

1253

create_parent_dir=create_parent_dir,

1254

dir_mode=dir_mode)

991

1255

self._cache = {}

992

1256

# position in _history is the 'official' index for a revision

993

1257

# but the values may have come from a newer entry.

994

# so - wc -l of a knit index is != the number of uniqe names

995

# in the weave.

1258

# so - wc -l of a knit index is != the number of unique names

1259

# in the knit.

996

1260

self._history = []

997

pb = bzrlib.ui.ui_factory.nested_progress_bar()

998

1261

try:

999

count = 0

1000

total = 1

1262

fp = self._transport.get(self._filename)

1001

1263

try:

1002

pb.update('read knit index', count, total)

1003

fp = self._transport.get(self._filename)

1004

self.check_header(fp)

1005

# readlines reads the whole file at once:

1006

# bad for transports like http, good for local disk

1007

# we save 60 ms doing this one change (

1008

# from calling readline each time to calling

1009

# readlines once.

1010

# probably what we want for nice behaviour on

1011

# http is a incremental readlines that yields, or

1012

# a check for local vs non local indexes,

1013

for l in fp.readlines():

1014

rec = l.split()

1015

if len(rec) < 5 or rec[-1] != ':':

1016

# corrupt line.

1017

# FIXME: in the future we should determine if its a

1018

# short write - and ignore it

1019

# or a different failure, and raise. RBC 20060407

1020

continue

1021

count += 1

1022

total += 1

1023

#pb.update('read knit index', count, total)

1024

# See self._parse_parents

1025

parents = []

1026

for value in rec[4:-1]:

1027

if '.' == value[0]:

1028

# uncompressed reference

1029

parents.append(value[1:])

1030

else:

1031

# this is 15/4000ms faster than isinstance,

1032

# (in lsprof)

1033

# this function is called thousands of times a

1034

# second so small variations add up.

1035

assert value.__class__ is str

1036

parents.append(self._history[int(value)])

1037

# end self._parse_parents

1038

# self._cache_version(rec[0],

1039

# rec[1].split(','),

1040

# int(rec[2]),

1041

# int(rec[3]),

1042

# parents)

1043

# --- self._cache_version

1044

# only want the _history index to reference the 1st

1045

# index entry for version_id

1046

version_id = rec[0]

1047

if version_id not in self._cache:

1048

index = len(self._history)

1049

self._history.append(version_id)

1050

else:

1051

index = self._cache[version_id][5]

1052

self._cache[version_id] = (version_id,

1053

rec[1].split(','),

1054

int(rec[2]),

1055

int(rec[3]),

1056

parents,

1057

index)

1058

# --- self._cache_version

1059

except NoSuchFile, e:

1060

if mode != 'w' or not create:

1061

raise

1062

self.write_header()

1063

finally:

1064

pb.update('read knit index', total, total)

1065

pb.finished()

1066

1067

def _parse_parents(self, compressed_parents):

1068

"""convert a list of string parent values into version ids.

1069

1070

ints are looked up in the index.

1071

.FOO values are ghosts and converted in to FOO.

1072

1073

NOTE: the function is retained here for clarity, and for possible

1074

use in partial index reads. However bulk processing now has

1075

it inlined in __init__ for inner-loop optimisation.

1076

"""

1077

result = []

1078

for value in compressed_parents:

1079

if value[-1] == '.':

1080

# uncompressed reference

1081

result.append(value[1:])

1264

# _load_data may raise NoSuchFile if the target knit is

1265

# completely empty.

1266

_load_data(self, fp)

1267

finally:

1268

fp.close()

1269

except NoSuchFile:

1270

if mode != 'w' or not create:

1271

raise

1272

elif delay_create:

1273

self._need_to_create = True

1082

1274

else:

1083

# this is 15/4000ms faster than isinstance,

1084

# this function is called thousands of times a

1085

# second so small variations add up.

1086

assert value.__class__ is str

1087

result.append(self._history[int(value)])

1088

return result

1275

self._transport.put_bytes_non_atomic(

1276

self._filename, self.HEADER, mode=self._file_mode)

1089

1277

1090

1278

def get_graph(self):

1091

graph = []

1092

for version_id, index in self._cache.iteritems():

1093

graph.append((version_id, index[4]))

1094

return graph

1279

"""Return a list of the node:parents lists from this knit index."""

1280

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1095

1281

1096

def get_ancestry(self, versions):

1282

def get_ancestry(self, versions, topo_sorted=True):

1097

1283

"""See VersionedFile.get_ancestry."""

1098

1284

# get a graph of all the mentioned versions:

1099

1285

graph = {}

1100

1286

pending = set(versions)

1101

while len(pending):

1287

cache = self._cache

1288

while pending:

1102

1289

version = pending.pop()

1103

parents = self._cache[version][4]

1104

# got the parents ok

1105

1290

# trim ghosts

1106

parents = [parent for parent in parents if parent in self._cache]

1107

for parent in parents:

1108

# if not completed and not a ghost

1109

if parent not in graph:

1110

pending.add(parent)

1291

try:

1292

parents = [p for p in cache[version][4] if p in cache]

1293

except KeyError:

1294

raise RevisionNotPresent(version, self._filename)

1295

# if not completed and not a ghost

1296

pending.update([p for p in parents if p not in graph])

1111

1297

graph[version] = parents

1298

if not topo_sorted:

1299

return graph.keys()

1112

1300

return topo_sort(graph.items())

1113

1301

1114

1302

def get_ancestry_with_ghosts(self, versions):

1115

1303

"""See VersionedFile.get_ancestry_with_ghosts."""

1116

1304

# get a graph of all the mentioned versions:

1305

self.check_versions_present(versions)

1306

cache = self._cache

1117

1307

graph = {}

1118

1308

pending = set(versions)

1119

while len(pending):

1309

while pending:

1120

1310

version = pending.pop()

1121

1311

try:

1122

parents = self._cache[version][4]

1312

parents = cache[version][4]

1123

1313

except KeyError:

1124

1314

# ghost, fake it

1125

1315

graph[version] = []

1126

pass

1127

1316

else:

1128

# got the parents ok

1129

for parent in parents:

1130

if parent not in graph:

1131

pending.add(parent)

1317

# if not completed

1318

pending.update([p for p in parents if p not in graph])

1132

1319

graph[version] = parents

1133

1320

return topo_sort(graph.items())

1134

1321

1322

def iter_parents(self, version_ids):

1323

"""Iterate through the parents for many version ids.

1324

1325

:param version_ids: An iterable yielding version_ids.

1326

:return: An iterator that yields (version_id, parents). Requested

1327

version_ids not present in the versioned file are simply skipped.

1328

The order is undefined, allowing for different optimisations in

1329

the underlying implementation.

1330

"""

1331

for version_id in version_ids:

1332

try:

1333

yield version_id, tuple(self.get_parents(version_id))

1334

except KeyError:

1335

pass

1336

1135

1337

def num_versions(self):

1136

1338

return len(self._history)

1137

1339

1138

1340

__len__ = num_versions

1139

1341

1140

1342

def get_versions(self):

1343

"""Get all the versions in the file. not topologically sorted."""

1141

1344

return self._history

1142

1345

1143

def idx_to_name(self, idx):

1144

return self._history[idx]

1145

1146

def lookup(self, version_id):

1147

assert version_id in self._cache

1148

return self._cache[version_id][5]

1149

1150

1346

def _version_list_to_index(self, versions):

1151

1347

result_list = []

1348

cache = self._cache

1152

1349

for version in versions:

1153

if version in self._cache:

1350

if version in cache:

1154

1351

# -- inlined lookup() --

1155

result_list.append(str(self._cache[version][5]))

1352

result_list.append(str(cache[version][5]))

1156

1353

# -- end lookup () --

1157

1354

else:

1158

result_list.append('.' + version.encode('utf-8'))

1355

result_list.append('.' + version)

1159

1356

return ' '.join(result_list)

1160

1357

1161

def add_version(self, version_id, options, pos, size, parents):

1358

def add_version(self, version_id, options, index_memo, parents):

1162

1359

"""Add a version record to the index."""

1163

self.add_versions(((version_id, options, pos, size, parents),))

1360

self.add_versions(((version_id, options, index_memo, parents),))

1164

1361

1165

1362

def add_versions(self, versions):

1166

1363

"""Add multiple versions to the index.

1169

1366

(version_id, options, pos, size, parents).

1170

1367

"""

1171

1368

lines = []

1172

for version_id, options, pos, size, parents in versions:

1173

line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),

1174

','.join(options),

1175

pos,

1176

size,

1177

self._version_list_to_index(parents))

1178

assert isinstance(line, str), \

1179

'content must be utf-8 encoded: %r' % (line,)

1180

lines.append(line)

1181

self._transport.append(self._filename, StringIO(''.join(lines)))

1182

# cache after writing, so that a failed write leads to missing cache

1183

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1184

# failure, reload the index or flush it or some such, to prevent

1185

# writing records that did complete twice.

1186

for version_id, options, pos, size, parents in versions:

1187

self._cache_version(version_id, options, pos, size, parents)

1188

1369

orig_history = self._history[:]

1370

orig_cache = self._cache.copy()

1371

1372

try:

1373

for version_id, options, (index, pos, size), parents in versions:

1374

line = "\n%s %s %s %s %s :" % (version_id,

1375

','.join(options),

1376

pos,

1377

size,

1378

self._version_list_to_index(parents))

1379

assert isinstance(line, str), \

1380

'content must be utf-8 encoded: %r' % (line,)

1381

lines.append(line)

1382

self._cache_version(version_id, options, pos, size, parents)

1383

if not self._need_to_create:

1384

self._transport.append_bytes(self._filename, ''.join(lines))

1385

else:

1386

sio = StringIO()

1387

sio.write(self.HEADER)

1388

sio.writelines(lines)

1389

sio.seek(0)

1390

self._transport.put_file_non_atomic(self._filename, sio,

1391

create_parent_dir=self._create_parent_dir,

1392

mode=self._file_mode,

1393

dir_mode=self._dir_mode)

1394

self._need_to_create = False

1395

except:

1396

# If any problems happen, restore the original values and re-raise

1397

self._history = orig_history

1398

self._cache = orig_cache

1399

raise

1400

1189

1401

def has_version(self, version_id):

1190

1402

"""True if the version is in the index."""

1191

return self._cache.has_key(version_id)

1403

return version_id in self._cache

1192

1404

1193

1405

def get_position(self, version_id):

1194

"""Return data position and size of specified version."""

1195

return (self._cache[version_id][2], \

1196

self._cache[version_id][3])

1406

"""Return details needed to access the version.

1407

1408

.kndx indices do not support split-out data, so return None for the

1409

index field.

1410

1411

:return: a tuple (None, data position, size) to hand to the access

1412

logic to get the record.

1413

"""

1414

entry = self._cache[version_id]

1415

return None, entry[2], entry[3]

1197

1416

1198

1417

def get_method(self, version_id):

1199

1418

"""Return compression method of specified version."""

1201

1420

if 'fulltext' in options:

1202

1421

return 'fulltext'

1203

1422

else:

1204

assert 'line-delta' in options

1423

if 'line-delta' not in options:

1424

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1205

1425

return 'line-delta'

1206

1426

1207

1427

def get_options(self, version_id):

1428

"""Return a string represention options.

1429

1430

e.g. foo,bar

1431

"""

1208

1432

return self._cache[version_id][1]

1209

1433

1210

1434

def get_parents(self, version_id):

1213

1437

if parent in self._cache]

1214

1438

1215

1439

def get_parents_with_ghosts(self, version_id):

1216

"""Return parents of specified version wth ghosts."""

1440

"""Return parents of specified version with ghosts."""

1217

1441

return self._cache[version_id][4]

1218

1442

1219

1443

def check_versions_present(self, version_ids):

1220

1444

"""Check that all specified versions are present."""

1221

version_ids = set(version_ids)

1222

for version_id in list(version_ids):

1223

if version_id in self._cache:

1224

version_ids.remove(version_id)

1225

if version_ids:

1226

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1227

1228

1229

class _KnitData(_KnitComponentFile):

1230

"""Contents of the knit data file"""

1231

1232

HEADER = "# bzr knit data 8\n"

1233

1234

def __init__(self, transport, filename, mode, create=False, file_mode=None):

1235

_KnitComponentFile.__init__(self, transport, filename, mode)

1236

self._file = None

1445

cache = self._cache

1446

for version_id in version_ids:

1447

if version_id not in cache:

1448

raise RevisionNotPresent(version_id, self._filename)

1449

1450

1451

class KnitGraphIndex(object):

1452

"""A knit index that builds on GraphIndex."""

1453

1454

def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):

1455

"""Construct a KnitGraphIndex on a graph_index.

1456

1457

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1458

:param deltas: Allow delta-compressed records.

1459

:param add_callback: If not None, allow additions to the index and call

1460

this callback with a list of added GraphIndex nodes:

1461

[(node, value, node_refs), ...]

1462

:param parents: If True, record knits parents, if not do not record

1463

parents.

1464

"""

1465

self._graph_index = graph_index

1466

self._deltas = deltas

1467

self._add_callback = add_callback

1468

self._parents = parents

1469

if deltas and not parents:

1470

raise KnitCorrupt(self, "Cannot do delta compression without "

1471

"parent tracking.")

1472

1473

def _get_entries(self, keys, check_present=False):

1474

"""Get the entries for keys.

1475

1476

:param keys: An iterable of index keys, - 1-tuples.

1477

"""

1478

keys = set(keys)

1479

found_keys = set()

1480

if self._parents:

1481

for node in self._graph_index.iter_entries(keys):

1482

yield node

1483

found_keys.add(node[1])

1484

else:

1485

# adapt parentless index to the rest of the code.

1486

for node in self._graph_index.iter_entries(keys):

1487

yield node[0], node[1], node[2], ()

1488

found_keys.add(node[1])

1489

if check_present:

1490

missing_keys = keys.difference(found_keys)

1491

if missing_keys:

1492

raise RevisionNotPresent(missing_keys.pop(), self)

1493

1494

def _present_keys(self, version_ids):

1495

return set([

1496

node[1] for node in self._get_entries(version_ids)])

1497

1498

def _parentless_ancestry(self, versions):

1499

"""Honour the get_ancestry API for parentless knit indices."""

1500

wanted_keys = self._version_ids_to_keys(versions)

1501

present_keys = self._present_keys(wanted_keys)

1502

missing = set(wanted_keys).difference(present_keys)

1503

if missing:

1504

raise RevisionNotPresent(missing.pop(), self)

1505

return list(self._keys_to_version_ids(present_keys))

1506

1507

def get_ancestry(self, versions, topo_sorted=True):

1508

"""See VersionedFile.get_ancestry."""

1509

if not self._parents:

1510

return self._parentless_ancestry(versions)

1511

# XXX: This will do len(history) index calls - perhaps

1512

# it should be altered to be a index core feature?

1513

# get a graph of all the mentioned versions:

1514

graph = {}

1515

ghosts = set()

1516

versions = self._version_ids_to_keys(versions)

1517

pending = set(versions)

1518

while pending:

1519

# get all pending nodes

1520

this_iteration = pending

1521

new_nodes = self._get_entries(this_iteration)

1522

found = set()

1523

pending = set()

1524

for (index, key, value, node_refs) in new_nodes:

1525

# dont ask for ghosties - otherwise

1526

# we we can end up looping with pending

1527

# being entirely ghosted.

1528

graph[key] = [parent for parent in node_refs[0]

1529

if parent not in ghosts]

1530

# queue parents

1531

for parent in graph[key]:

1532

# dont examine known nodes again

1533

if parent in graph:

1534

continue

1535

pending.add(parent)

1536

found.add(key)

1537

ghosts.update(this_iteration.difference(found))

1538

if versions.difference(graph):

1539

raise RevisionNotPresent(versions.difference(graph).pop(), self)

1540

if topo_sorted:

1541

result_keys = topo_sort(graph.items())

1542

else:

1543

result_keys = graph.iterkeys()

1544

return [key[0] for key in result_keys]

1545

1546

def get_ancestry_with_ghosts(self, versions):

1547

"""See VersionedFile.get_ancestry."""

1548

if not self._parents:

1549

return self._parentless_ancestry(versions)

1550

# XXX: This will do len(history) index calls - perhaps

1551

# it should be altered to be a index core feature?

1552

# get a graph of all the mentioned versions:

1553

graph = {}

1554

versions = self._version_ids_to_keys(versions)

1555

pending = set(versions)

1556

while pending:

1557

# get all pending nodes

1558

this_iteration = pending

1559

new_nodes = self._get_entries(this_iteration)

1560

pending = set()

1561

for (index, key, value, node_refs) in new_nodes:

1562

graph[key] = node_refs[0]

1563

# queue parents

1564

for parent in graph[key]:

1565

# dont examine known nodes again

1566

if parent in graph:

1567

continue

1568

pending.add(parent)

1569

missing_versions = this_iteration.difference(graph)

1570

missing_needed = versions.intersection(missing_versions)

1571

if missing_needed:

1572

raise RevisionNotPresent(missing_needed.pop(), self)

1573

for missing_version in missing_versions:

1574

# add a key, no parents

1575

graph[missing_version] = []

1576

pending.discard(missing_version) # don't look for it

1577

result_keys = topo_sort(graph.items())

1578

return [key[0] for key in result_keys]

1579

1580

def get_graph(self):

1581

"""Return a list of the node:parents lists from this knit index."""

1582

if not self._parents:

1583

return [(key, ()) for key in self.get_versions()]

1584

result = []

1585

for index, key, value, refs in self._graph_index.iter_all_entries():

1586

result.append((key[0], tuple([ref[0] for ref in refs[0]])))

1587

return result

1588

1589

def iter_parents(self, version_ids):

1590

"""Iterate through the parents for many version ids.

1591

1592

:param version_ids: An iterable yielding version_ids.

1593

:return: An iterator that yields (version_id, parents). Requested

1594

version_ids not present in the versioned file are simply skipped.

1595

The order is undefined, allowing for different optimisations in

1596

the underlying implementation.

1597

"""

1598

if self._parents:

1599

all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))

1600

all_parents = set()

1601

present_parents = set()

1602

for node in all_nodes:

1603

all_parents.update(node[3][0])

1604

# any node we are querying must be present

1605

present_parents.add(node[1])

1606

unknown_parents = all_parents.difference(present_parents)

1607

present_parents.update(self._present_keys(unknown_parents))

1608

for node in all_nodes:

1609

parents = []

1610

for parent in node[3][0]:

1611

if parent in present_parents:

1612

parents.append(parent[0])

1613

yield node[1][0], tuple(parents)

1614

else:

1615

for node in self._get_entries(self._version_ids_to_keys(version_ids)):

1616

yield node[1][0], ()

1617

1618

def num_versions(self):

1619

return len(list(self._graph_index.iter_all_entries()))

1620

1621

__len__ = num_versions

1622

1623

def get_versions(self):

1624

"""Get all the versions in the file. not topologically sorted."""

1625

return [node[1][0] for node in self._graph_index.iter_all_entries()]

1626

1627

def has_version(self, version_id):

1628

"""True if the version is in the index."""

1629

return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1

1630

1631

def _keys_to_version_ids(self, keys):

1632

return tuple(key[0] for key in keys)

1633

1634

def get_position(self, version_id):

1635

"""Return details needed to access the version.

1636

1637

:return: a tuple (index, data position, size) to hand to the access

1638

logic to get the record.

1639

"""

1640

node = self._get_node(version_id)

1641

bits = node[2][1:].split(' ')

1642

return node[0], int(bits[0]), int(bits[1])

1643

1644

def get_method(self, version_id):

1645

"""Return compression method of specified version."""

1646

if not self._deltas:

1647

return 'fulltext'

1648

return self._parent_compression(self._get_node(version_id)[3][1])

1649

1650

def _parent_compression(self, reference_list):

1651

# use the second reference list to decide if this is delta'd or not.

1652

if len(reference_list):

1653

return 'line-delta'

1654

else:

1655

return 'fulltext'

1656

1657

def _get_node(self, version_id):

1658

return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]

1659

1660

def get_options(self, version_id):

1661

"""Return a string represention options.

1662

1663

e.g. foo,bar

1664

"""

1665

node = self._get_node(version_id)

1666

if not self._deltas:

1667

options = ['fulltext']

1668

else:

1669

options = [self._parent_compression(node[3][1])]

1670

if node[2][0] == 'N':

1671

options.append('no-eol')

1672

return options

1673

1674

def get_parents(self, version_id):

1675

"""Return parents of specified version ignoring ghosts."""

1676

parents = list(self.iter_parents([version_id]))

1677

if not parents:

1678

# missing key

1679

raise errors.RevisionNotPresent(version_id, self)

1680

return parents[0][1]

1681

1682

def get_parents_with_ghosts(self, version_id):

1683

"""Return parents of specified version with ghosts."""

1684

nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),

1685

check_present=True))

1686

if not self._parents:

1687

return ()

1688

return self._keys_to_version_ids(nodes[0][3][0])

1689

1690

def check_versions_present(self, version_ids):

1691

"""Check that all specified versions are present."""

1692

keys = self._version_ids_to_keys(version_ids)

1693

present = self._present_keys(keys)

1694

missing = keys.difference(present)

1695

if missing:

1696

raise RevisionNotPresent(missing.pop(), self)

1697

1698

def add_version(self, version_id, options, access_memo, parents):

1699

"""Add a version record to the index."""

1700

return self.add_versions(((version_id, options, access_memo, parents),))

1701

1702

def add_versions(self, versions):

1703

"""Add multiple versions to the index.

1704

1705

This function does not insert data into the Immutable GraphIndex

1706

backing the KnitGraphIndex, instead it prepares data for insertion by

1707

the caller and checks that it is safe to insert then calls

1708

self._add_callback with the prepared GraphIndex nodes.

1709

1710

:param versions: a list of tuples:

1711

(version_id, options, pos, size, parents).

1712

"""

1713

if not self._add_callback:

1714

raise errors.ReadOnlyError(self)

1715

# we hope there are no repositories with inconsistent parentage

1716

# anymore.

1717

# check for dups

1718

1719

keys = {}

1720

for (version_id, options, access_memo, parents) in versions:

1721

index, pos, size = access_memo

1722

key = (version_id, )

1723

parents = tuple((parent, ) for parent in parents)

1724

if 'no-eol' in options:

1725

value = 'N'

1726

else:

1727

value = ' '

1728

value += "%d %d" % (pos, size)

1729

if not self._deltas:

1730

if 'line-delta' in options:

1731

raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")

1732

if self._parents:

1733

if self._deltas:

1734

if 'line-delta' in options:

1735

node_refs = (parents, (parents[0],))

1736

else:

1737

node_refs = (parents, ())

1738

else:

1739

node_refs = (parents, )

1740

else:

1741

if parents:

1742

raise KnitCorrupt(self, "attempt to add node with parents "

1743

"in parentless index.")

1744

node_refs = ()

1745

keys[key] = (value, node_refs)

1746

present_nodes = self._get_entries(keys)

1747

for (index, key, value, node_refs) in present_nodes:

1748

if (value, node_refs) != keys[key]:

1749

raise KnitCorrupt(self, "inconsistent details in add_versions"

1750

": %s %s" % ((value, node_refs), keys[key]))

1751

del keys[key]

1752

result = []

1753

if self._parents:

1754

for key, (value, node_refs) in keys.iteritems():

1755

result.append((key, value, node_refs))

1756

else:

1757

for key, (value, node_refs) in keys.iteritems():

1758

result.append((key, value))

1759

self._add_callback(result)

1760

1761

def _version_ids_to_keys(self, version_ids):

1762

return set((version_id, ) for version_id in version_ids)

1763

1764

1765

class _KnitAccess(object):

1766

"""Access to knit records in a .knit file."""

1767

1768

def __init__(self, transport, filename, _file_mode, _dir_mode,

1769

_need_to_create, _create_parent_dir):

1770

"""Create a _KnitAccess for accessing and inserting data.

1771

1772

:param transport: The transport the .knit is located on.

1773

:param filename: The filename of the .knit.

1774

"""

1775

self._transport = transport

1776

self._filename = filename

1777

self._file_mode = _file_mode

1778

self._dir_mode = _dir_mode

1779

self._need_to_create = _need_to_create

1780

self._create_parent_dir = _create_parent_dir

1781

1782

def add_raw_records(self, sizes, raw_data):

1783

"""Add raw knit bytes to a storage area.

1784

1785

The data is spooled to whereever the access method is storing data.

1786

1787

:param sizes: An iterable containing the size of each raw data segment.

1788

:param raw_data: A bytestring containing the data.

1789

:return: A list of memos to retrieve the record later. Each memo is a

1790

tuple - (index, pos, length), where the index field is always None

1791

for the .knit access method.

1792

"""

1793

assert type(raw_data) == str, \

1794

'data must be plain bytes was %s' % type(raw_data)

1795

if not self._need_to_create:

1796

base = self._transport.append_bytes(self._filename, raw_data)

1797

else:

1798

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1799

create_parent_dir=self._create_parent_dir,

1800

mode=self._file_mode,

1801

dir_mode=self._dir_mode)

1802

self._need_to_create = False

1803

base = 0

1804

result = []

1805

for size in sizes:

1806

result.append((None, base, size))

1807

base += size

1808

return result

1809

1810

def create(self):

1811

"""IFF this data access has its own storage area, initialise it.

1812

1813

:return: None.

1814

"""

1815

self._transport.put_bytes_non_atomic(self._filename, '',

1816

mode=self._file_mode)

1817

1818

def open_file(self):

1819

"""IFF this data access can be represented as a single file, open it.

1820

1821

For knits that are not mapped to a single file on disk this will

1822

always return None.

1823

1824

:return: None or a file handle.

1825

"""

1826

try:

1827

return self._transport.get(self._filename)

1828

except NoSuchFile:

1829

pass

1830

return None

1831

1832

def get_raw_records(self, memos_for_retrieval):

1833

"""Get the raw bytes for a records.

1834

1835

:param memos_for_retrieval: An iterable containing the (index, pos,

1836

length) memo for retrieving the bytes. The .knit method ignores

1837

the index as there is always only a single file.

1838

:return: An iterator over the bytes of the records.

1839

"""

1840

read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]

1841

for pos, data in self._transport.readv(self._filename, read_vector):

1842

yield data

1843

1844

1845

class _PackAccess(object):

1846

"""Access to knit records via a collection of packs."""

1847

1848

def __init__(self, index_to_packs, writer=None):

1849

"""Create a _PackAccess object.

1850

1851

:param index_to_packs: A dict mapping index objects to the transport

1852

and file names for obtaining data.

1853

:param writer: A tuple (pack.ContainerWriter, write_index) which

1854

contains the pack to write, and the index that reads from it will

1855

be associated with.

1856

"""

1857

if writer:

1858

self.container_writer = writer[0]

1859

self.write_index = writer[1]

1860

else:

1861

self.container_writer = None

1862

self.write_index = None

1863

self.indices = index_to_packs

1864

1865

def add_raw_records(self, sizes, raw_data):

1866

"""Add raw knit bytes to a storage area.

1867

1868

The data is spooled to the container writer in one bytes-record per

1869

raw data item.

1870

1871

:param sizes: An iterable containing the size of each raw data segment.

1872

:param raw_data: A bytestring containing the data.

1873

:return: A list of memos to retrieve the record later. Each memo is a

1874

tuple - (index, pos, length), where the index field is the

1875

write_index object supplied to the PackAccess object.

1876

"""

1877

assert type(raw_data) == str, \

1878

'data must be plain bytes was %s' % type(raw_data)

1879

result = []

1880

offset = 0

1881

for size in sizes:

1882

p_offset, p_length = self.container_writer.add_bytes_record(

1883

raw_data[offset:offset+size], [])

1884

offset += size

1885

result.append((self.write_index, p_offset, p_length))

1886

return result

1887

1888

def create(self):

1889

"""Pack based knits do not get individually created."""

1890

1891

def get_raw_records(self, memos_for_retrieval):

1892

"""Get the raw bytes for a records.

1893

1894

:param memos_for_retrieval: An iterable containing the (index, pos,

1895

length) memo for retrieving the bytes. The Pack access method

1896

looks up the pack to use for a given record in its index_to_pack

1897

map.

1898

:return: An iterator over the bytes of the records.

1899

"""

1900

# first pass, group into same-index requests

1901

request_lists = []

1902

current_index = None

1903

for (index, offset, length) in memos_for_retrieval:

1904

if current_index == index:

1905

current_list.append((offset, length))

1906

else:

1907

if current_index is not None:

1908

request_lists.append((current_index, current_list))

1909

current_index = index

1910

current_list = [(offset, length)]

1911

# handle the last entry

1912

if current_index is not None:

1913

request_lists.append((current_index, current_list))

1914

for index, offsets in request_lists:

1915

transport, path = self.indices[index]

1916

reader = pack.make_readv_reader(transport, path, offsets)

1917

for names, read_func in reader.iter_records():

1918

yield read_func(None)

1919

1920

def open_file(self):

1921

"""Pack based knits have no single file."""

1922

return None

1923

1924

def set_writer(self, writer, index, (transport, packname)):

1925

"""Set a writer to use for adding data."""

1926

self.indices[index] = (transport, packname)

1927

self.container_writer = writer

1928

self.write_index = index

1929

1930

1931

class _KnitData(object):

1932

"""Manage extraction of data from a KnitAccess, caching and decompressing.

1933

1934

The KnitData class provides the logic for parsing and using knit records,

1935

making use of an access method for the low level read and write operations.

1936

"""

1937

1938

def __init__(self, access):

1939

"""Create a KnitData object.

1940

1941

:param access: The access method to use. Access methods such as

1942

_KnitAccess manage the insertion of raw records and the subsequent

1943

retrieval of the same.

1944

"""

1945

self._access = access

1237

1946

self._checked = False

1238

if create:

1239

self._transport.put(self._filename, StringIO(''), mode=file_mode)

1240

self._records = {}

1947

# TODO: jam 20060713 conceptually, this could spill to disk

1948

# if the cached size gets larger than a certain amount

1949

# but it complicates the model a bit, so for now just use

1950

# a simple dictionary

1951

self._cache = {}

1952

self._do_cache = False

1953

1954

def enable_cache(self):

1955

"""Enable caching of reads."""

1956

self._do_cache = True

1241

1957

1242

1958

def clear_cache(self):

1243

1959

"""Clear the record cache."""

1244

self._records = {}

1960

self._do_cache = False

1961

self._cache = {}

1245

1962

1246

1963

def _open_file(self):

1247

if self._file is None:

1248

try:

1249

self._file = self._transport.get(self._filename)

1250

except NoSuchFile:

1251

pass

1252

return self._file

1964

return self._access.open_file()

1253

1965

1254

1966

def _record_to_data(self, version_id, digest, lines):

1255

1967

"""Convert version_id, digest, lines into a raw data block.

1257

1969

:return: (len, a StringIO instance with the raw data ready to read.)

1258

1970

"""

1259

1971

sio = StringIO()

1260

data_file = GzipFile(None, mode='wb', fileobj=sio)

1972

data_file = GzipFile(None, mode='wb', fileobj=sio,

1973

compresslevel=Z_DEFAULT_COMPRESSION)

1974

1975

assert isinstance(version_id, str)

1261

1976

data_file.writelines(chain(

1262

["version %s %d %s\n" % (version_id.encode('utf-8'),

1977

["version %s %d %s\n" % (version_id,

1263

1978

len(lines),

1264

1979

digest)],

1265

1980

lines,

1266

["end %s\n" % version_id.encode('utf-8')]))

1981

["end %s\n" % version_id]))

1267

1982

data_file.close()

1268

1983

length= sio.tell()

1269

1984

1270

1985

sio.seek(0)

1271

1986

return length, sio

1272

1987

1273

def add_raw_record(self, raw_data):

1988

def add_raw_records(self, sizes, raw_data):

1274

1989

"""Append a prepared record to the data file.

1275

1990

1276

:return: the offset in the data file raw_data was written.

1991

:param sizes: An iterable containing the size of each raw data segment.

1992

:param raw_data: A bytestring containing the data.

1993

:return: a list of index data for the way the data was stored.

1994

See the access method add_raw_records documentation for more

1995

details.

1277

1996

"""

1278

assert isinstance(raw_data, str), 'data must be plain bytes'

1279

return self._transport.append(self._filename, StringIO(raw_data))

1997

return self._access.add_raw_records(sizes, raw_data)

1280

1998

1281

1999

def add_record(self, version_id, digest, lines):

1282

"""Write new text record to disk. Returns the position in the

1283

file where it was written."""

2000

"""Write new text record to disk.

2001

2002

Returns index data for retrieving it later, as per add_raw_records.

2003

"""

1284

2004

size, sio = self._record_to_data(version_id, digest, lines)

1285

# cache

1286

self._records[version_id] = (digest, lines)

1287

# write to disk

1288

start_pos = self._transport.append(self._filename, sio)

1289

return start_pos, size

2005

result = self.add_raw_records([size], sio.getvalue())

2006

if self._do_cache:

2007

self._cache[version_id] = sio.getvalue()

2008

return result[0]

1290

2009

1291

2010

def _parse_record_header(self, version_id, raw_data):

1292

2011

"""Parse a record header for consistency.

1295

2014

as (stream, header_record)

1296

2015

"""

1297

2016

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1298

rec = df.readline().split()

2017

try:

2018

rec = self._check_header(version_id, df.readline())

2019

except Exception, e:

2020

raise KnitCorrupt(self._access,

2021

"While reading {%s} got %s(%s)"

2022

% (version_id, e.__class__.__name__, str(e)))

2023

return df, rec

2024

2025

def _check_header(self, version_id, line):

2026

rec = line.split()

1299

2027

if len(rec) != 4:

1300

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1301

if rec[1].decode('utf-8')!= version_id:

1302

raise KnitCorrupt(self._filename,

1303

'unexpected version, wanted %r, got %r' % (

1304

version_id, rec[1]))

1305

return df, rec

2028

raise KnitCorrupt(self._access,

2029

'unexpected number of elements in record header')

2030

if rec[1] != version_id:

2031

raise KnitCorrupt(self._access,

2032

'unexpected version, wanted %r, got %r'

2033

% (version_id, rec[1]))

2034

return rec

1306

2035

1307

2036

def _parse_record(self, version_id, data):

1308

2037

# profiling notes:

1309

2038

# 4168 calls in 2880 217 internal

1310

2039

# 4168 calls to _parse_record_header in 2121

1311

2040

# 4168 calls to readlines in 330

1312

df, rec = self._parse_record_header(version_id, data)

1313

record_contents = df.readlines()

1314

l = record_contents.pop()

1315

assert len(record_contents) == int(rec[2])

1316

if l.decode('utf-8') != 'end %s\n' % version_id:

1317

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1318

% (l, version_id))

2041

df = GzipFile(mode='rb', fileobj=StringIO(data))

2042

2043

try:

2044

record_contents = df.readlines()

2045

except Exception, e:

2046

raise KnitCorrupt(self._access,

2047

"While reading {%s} got %s(%s)"

2048

% (version_id, e.__class__.__name__, str(e)))

2049

header = record_contents.pop(0)

2050

rec = self._check_header(version_id, header)

2051

2052

last_line = record_contents.pop()

2053

if len(record_contents) != int(rec[2]):

2054

raise KnitCorrupt(self._access,

2055

'incorrect number of lines %s != %s'

2056

' for version {%s}'

2057

% (len(record_contents), int(rec[2]),

2058

version_id))

2059

if last_line != 'end %s\n' % rec[1]:

2060

raise KnitCorrupt(self._access,

2061

'unexpected version end line %r, wanted %r'

2062

% (last_line, version_id))

1319

2063

df.close()

1320

2064

return record_contents, rec[3]

1321

2065

1324

2068

1325

2069

This unpacks enough of the text record to validate the id is

1326

2070

as expected but thats all.

1327

1328

It will actively recompress currently cached records on the

1329

basis that that is cheaper than I/O activity.

1330

2071

"""

1331

needed_records = []

1332

for version_id, pos, size in records:

1333

if version_id not in self._records:

1334

needed_records.append((version_id, pos, size))

1335

1336

2072

# setup an iterator of the external records:

1337

2073

# uses readv so nice and fast we hope.

1338

if len(needed_records):

2074

if len(records):

1339

2075

# grab the disk data needed.

1340

raw_records = self._transport.readv(self._filename,

1341

[(pos, size) for version_id, pos, size in needed_records])

1342

1343

for version_id, pos, size in records:

1344

if version_id in self._records:

1345

# compress a new version

1346

size, sio = self._record_to_data(version_id,

1347

self._records[version_id][0],

1348

self._records[version_id][1])

1349

yield version_id, sio.getvalue()

1350

else:

1351

pos, data = raw_records.next()

2076

if self._cache:

2077

# Don't check _cache if it is empty

2078

needed_offsets = [index_memo for version_id, index_memo

2079

in records

2080

if version_id not in self._cache]

2081

else:

2082

needed_offsets = [index_memo for version_id, index_memo

2083

in records]

2084

2085

raw_records = self._access.get_raw_records(needed_offsets)

2086

2087

for version_id, index_memo in records:

2088

if version_id in self._cache:

2089

# This data has already been validated

2090

data = self._cache[version_id]

2091

else:

2092

data = raw_records.next()

2093

if self._do_cache:

2094

self._cache[version_id] = data

2095

1352

2096

# validate the header

1353

2097

df, rec = self._parse_record_header(version_id, data)

1354

2098

df.close()

1355

yield version_id, data

1356

2099

yield version_id, data

1357

2100

1358

2101

def read_records_iter(self, records):

1359

2102

"""Read text records from data file and yield result.

1360

2103

1361

Each passed record is a tuple of (version_id, pos, len) and

1362

will be read in the given order. Yields (version_id,

1363

contents, digest).

2104

The result will be returned in whatever is the fastest to read.

2105

Not by the order requested. Also, multiple requests for the same

2106

record will only yield 1 response.

2107

:param records: A list of (version_id, pos, len) entries

2108

:return: Yields (version_id, contents, digest) in the order

2109

read, not the order requested

1364

2110

"""

1365

# profiling notes:

1366

# 60890 calls for 4168 extractions in 5045, 683 internal.

1367

# 4168 calls to readv in 1411

1368

# 4168 calls to parse_record in 2880

1369

1370

needed_records = []

1371

for version_id, pos, size in records:

1372

if version_id not in self._records:

1373

needed_records.append((version_id, pos, size))

1374

1375

if len(needed_records):

1376

# We take it that the transport optimizes the fetching as good

1377

# as possible (ie, reads continous ranges.)

1378

response = self._transport.readv(self._filename,

1379

[(pos, size) for version_id, pos, size in needed_records])

1380

1381

for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):

1382

content, digest = self._parse_record(record_id, data)

1383

self._records[record_id] = (digest, content)

1384

1385

for version_id, pos, size in records:

1386

yield version_id, list(self._records[version_id][1]), self._records[version_id][0]

2111

if not records:

2112

return

2113

2114

if self._cache:

2115

# Skip records we have alread seen

2116

yielded_records = set()

2117

needed_records = set()

2118

for record in records:

2119

if record[0] in self._cache:

2120

if record[0] in yielded_records:

2121

continue

2122

yielded_records.add(record[0])

2123

data = self._cache[record[0]]

2124

content, digest = self._parse_record(record[0], data)

2125

yield (record[0], content, digest)

2126

else:

2127

needed_records.add(record)

2128

needed_records = sorted(needed_records, key=operator.itemgetter(1))

2129

else:

2130

needed_records = sorted(set(records), key=operator.itemgetter(1))

2131

2132

if not needed_records:

2133

return

2134

2135

# The transport optimizes the fetching as well

2136

# (ie, reads continuous ranges.)

2137

raw_data = self._access.get_raw_records(

2138

[index_memo for version_id, index_memo in needed_records])

2139

2140

for (version_id, index_memo), data in \

2141

izip(iter(needed_records), raw_data):

2142

content, digest = self._parse_record(version_id, data)

2143

if self._do_cache:

2144

self._cache[version_id] = data

2145

yield version_id, content, digest

1387

2146

1388

2147

def read_records(self, records):

1389

2148

"""Read records into a dictionary."""

1390

2149

components = {}

1391

for record_id, content, digest in self.read_records_iter(records):

2150

for record_id, content, digest in \

2151

self.read_records_iter(records):

1392

2152

components[record_id] = (content, digest)

1393

2153

return components

1394

2154

1418

2178

if not version_ids:

1419

2179

return 0

1420

2180

1421

pb = bzrlib.ui.ui_factory.nested_progress_bar()

2181

pb = ui.ui_factory.nested_progress_bar()

1422

2182

try:

1423

2183

version_ids = list(version_ids)

1424

2184

if None in version_ids:

1426

2186

1427

2187

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1428

2188

this_versions = set(self.target._index.get_versions())

2189

# XXX: For efficiency we should not look at the whole index,

2190

# we only need to consider the referenced revisions - they

2191

# must all be present, or the method must be full-text.

2192

# TODO, RBC 20070919

1429

2193

needed_versions = self.source_ancestry - this_versions

1430

cross_check_versions = self.source_ancestry.intersection(this_versions)

1431

mismatched_versions = set()

1432

for version in cross_check_versions:

1433

# scan to include needed parents.

1434

n1 = set(self.target.get_parents_with_ghosts(version))

1435

n2 = set(self.source.get_parents_with_ghosts(version))

1436

if n1 != n2:

1437

# FIXME TEST this check for cycles being introduced works

1438

# the logic is we have a cycle if in our graph we are an

1439

# ancestor of any of the n2 revisions.

1440

for parent in n2:

1441

if parent in n1:

1442

# safe

1443

continue

1444

else:

1445

parent_ancestors = self.source.get_ancestry(parent)

1446

if version in parent_ancestors:

1447

raise errors.GraphCycleError([parent, version])

1448

# ensure this parent will be available later.

1449

new_parents = n2.difference(n1)

1450

needed_versions.update(new_parents.difference(this_versions))

1451

mismatched_versions.add(version)

1452

2194

1453

if not needed_versions and not mismatched_versions:

2195

if not needed_versions:

1454

2196

return 0

1455

2197

full_list = topo_sort(self.source.get_graph())

1456

2198

1469

2211

# if source has the parent, we must :

1470

2212

# * already have it or

1471

2213

# * have it scheduled already

1472

# otherwise we dont care

2214

# otherwise we don't care

1473

2215

assert (self.target.has_version(parent) or

1474

2216

parent in copy_set or

1475

2217

not self.source.has_version(parent))

1476

data_pos, data_size = self.source._index.get_position(version_id)

1477

copy_queue_records.append((version_id, data_pos, data_size))

2218

index_memo = self.source._index.get_position(version_id)

2219

copy_queue_records.append((version_id, index_memo))

1478

2220

copy_queue.append((version_id, options, parents))

1479

2221

copy_set.add(version_id)

1480

2222

1493

2235

raw_records.append((version_id, options, parents, len(raw_data)))

1494

2236

raw_datum.append(raw_data)

1495

2237

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1496

1497

for version in mismatched_versions:

1498

# FIXME RBC 20060309 is this needed?

1499

n1 = set(self.target.get_parents_with_ghosts(version))

1500

n2 = set(self.source.get_parents_with_ghosts(version))

1501

# write a combined record to our history preserving the current

1502

# parents as first in the list

1503

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1504

self.target.fix_parents(version, new_parents)

1505

2238

return count

1506

2239

finally:

1507

2240

pb.finished()

1535

2268

if not version_ids:

1536

2269

return 0

1537

2270

1538

pb = bzrlib.ui.ui_factory.nested_progress_bar()

2271

pb = ui.ui_factory.nested_progress_bar()

1539

2272

try:

1540

2273

version_ids = list(version_ids)

1541

2274

1542

2275

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1543

2276

this_versions = set(self.target._index.get_versions())

1544

2277

needed_versions = self.source_ancestry - this_versions

1545

cross_check_versions = self.source_ancestry.intersection(this_versions)

1546

mismatched_versions = set()

1547

for version in cross_check_versions:

1548

# scan to include needed parents.

1549

n1 = set(self.target.get_parents_with_ghosts(version))

1550

n2 = set(self.source.get_parents(version))

1551

# if all of n2's parents are in n1, then its fine.

1552

if n2.difference(n1):

1553

# FIXME TEST this check for cycles being introduced works

1554

# the logic is we have a cycle if in our graph we are an

1555

# ancestor of any of the n2 revisions.

1556

for parent in n2:

1557

if parent in n1:

1558

# safe

1559

continue

1560

else:

1561

parent_ancestors = self.source.get_ancestry(parent)

1562

if version in parent_ancestors:

1563

raise errors.GraphCycleError([parent, version])

1564

# ensure this parent will be available later.

1565

new_parents = n2.difference(n1)

1566

needed_versions.update(new_parents.difference(this_versions))

1567

mismatched_versions.add(version)

1568

2278

1569

if not needed_versions and not mismatched_versions:

2279

if not needed_versions:

1570

2280

return 0

1571

2281

full_list = topo_sort(self.source.get_graph())

1572

2282

1586

2296

self.target.add_lines(

1587

2297

version_id, parents, self.source.get_lines(version_id))

1588

2298

count = count + 1

1589

1590

for version in mismatched_versions:

1591

# FIXME RBC 20060309 is this needed?

1592

n1 = set(self.target.get_parents_with_ghosts(version))

1593

n2 = set(self.source.get_parents(version))

1594

# write a combined record to our history preserving the current

1595

# parents as first in the list

1596

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1597

self.target.fix_parents(version, new_parents)

1598

2299

return count

1599

2300

finally:

1600

2301

pb.finished()

1603

2304

InterVersionedFile.register_optimiser(WeaveToKnit)

1604

2305

1605

2306

1606

class SequenceMatcher(difflib.SequenceMatcher):

1607

"""Knit tuned sequence matcher.

1608

1609

This is based on profiling of difflib which indicated some improvements

1610

for our usage pattern.

2307

# Deprecated, use PatienceSequenceMatcher instead

2308

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2309

2310

2311

def annotate_knit(knit, revision_id):

2312

"""Annotate a knit with no cached annotations.

2313

2314

This implementation is for knits with no cached annotations.

2315

It will work for knits with cached annotations, but this is not

2316

recommended.

1611

2317

"""

1612

1613

def find_longest_match(self, alo, ahi, blo, bhi):

1614

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1615

1616

If isjunk is not defined:

1617

1618

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1619

alo <= i <= i+k <= ahi

1620

blo <= j <= j+k <= bhi

1621

and for all (i',j',k') meeting those conditions,

1622

k >= k'

1623

i <= i'

1624

and if i == i', j <= j'

1625

1626

In other words, of all maximal matching blocks, return one that

1627

starts earliest in a, and of all those maximal matching blocks that

1628

start earliest in a, return the one that starts earliest in b.

1629

1630

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1631

>>> s.find_longest_match(0, 5, 0, 9)

1632

(0, 4, 5)

1633

1634

If isjunk is defined, first the longest matching block is

1635

determined as above, but with the additional restriction that no

1636

junk element appears in the block. Then that block is extended as

1637

far as possible by matching (only) junk elements on both sides. So

1638

the resulting block never matches on junk except as identical junk

1639

happens to be adjacent to an "interesting" match.

1640

1641

Here's the same example as before, but considering blanks to be

1642

junk. That prevents " abcd" from matching the " abcd" at the tail

1643

end of the second sequence directly. Instead only the "abcd" can

1644

match, and matches the leftmost "abcd" in the second sequence:

1645

1646

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1647

>>> s.find_longest_match(0, 5, 0, 9)

1648

(1, 0, 4)

1649

1650

If no blocks match, return (alo, blo, 0).

1651

1652

>>> s = SequenceMatcher(None, "ab", "c")

1653

>>> s.find_longest_match(0, 2, 0, 1)

1654

(0, 0, 0)

1655

"""

1656

1657

# CAUTION: stripping common prefix or suffix would be incorrect.

1658

# E.g.,

1659

# ab

1660

# acab

1661

# Longest matching block is "ab", but if common prefix is

1662

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1663

# strip, so ends up claiming that ab is changed to acab by

1664

# inserting "ca" in the middle. That's minimal but unintuitive:

1665

# "it's obvious" that someone inserted "ac" at the front.

1666

# Windiff ends up at the same place as diff, but by pairing up

1667

# the unique 'b's and then matching the first two 'a's.

1668

1669

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1670

besti, bestj, bestsize = alo, blo, 0

1671

# find longest junk-free match

1672

# during an iteration of the loop, j2len[j] = length of longest

1673

# junk-free match ending with a[i-1] and b[j]

1674

j2len = {}

1675

# nothing = []

1676

b2jget = b2j.get

1677

for i in xrange(alo, ahi):

1678

# look at all instances of a[i] in b; note that because

1679

# b2j has no junk keys, the loop is skipped if a[i] is junk

1680

j2lenget = j2len.get

1681

newj2len = {}

1682

1683

# changing b2j.get(a[i], nothing) to a try:Keyerror pair produced the

1684

# following improvement

1685

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1686

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1687

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1688

# to

1689

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1690

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1691

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1692

1693

try:

1694

js = b2j[a[i]]

1695

except KeyError:

1696

pass

1697

else:

1698

for j in js:

1699

# a[i] matches b[j]

1700

if j >= blo:

1701

if j >= bhi:

1702

break

1703

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1704

if k > bestsize:

1705

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1706

j2len = newj2len

1707

1708

# Extend the best by non-junk elements on each end. In particular,

1709

# "popular" non-junk elements aren't in b2j, which greatly speeds

1710

# the inner loop above, but also means "the best" match so far

1711

# doesn't contain any junk *or* popular non-junk elements.

1712

while besti > alo and bestj > blo and \

1713

not isbjunk(b[bestj-1]) and \

1714

a[besti-1] == b[bestj-1]:

1715

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1716

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1717

not isbjunk(b[bestj+bestsize]) and \

1718

a[besti+bestsize] == b[bestj+bestsize]:

1719

bestsize += 1

1720

1721

# Now that we have a wholly interesting match (albeit possibly

1722

# empty!), we may as well suck up the matching junk on each

1723

# side of it too. Can't think of a good reason not to, and it

1724

# saves post-processing the (possibly considerable) expense of

1725

# figuring out what to do with it. In the case of an empty

1726

# interesting match, this is clearly the right thing to do,

1727

# because no other kind of match is possible in the regions.

1728

while besti > alo and bestj > blo and \

1729

isbjunk(b[bestj-1]) and \

1730

a[besti-1] == b[bestj-1]:

1731

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1732

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1733

isbjunk(b[bestj+bestsize]) and \

1734

a[besti+bestsize] == b[bestj+bestsize]:

1735

bestsize = bestsize + 1

1736

1737

return besti, bestj, bestsize

1738

2318

ancestry = knit.get_ancestry(revision_id)

2319

fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))

2320

annotations = {}

2321

for candidate in ancestry:

2322

if candidate in annotations:

2323

continue

2324

parents = knit.get_parents(candidate)

2325

if len(parents) == 0:

2326

blocks = None

2327

elif knit._index.get_method(candidate) != 'line-delta':

2328

blocks = None

2329

else:

2330

parent, sha1, noeol, delta = knit.get_delta(candidate)

2331

blocks = KnitContent.get_line_delta_blocks(delta,

2332

fulltext[parents[0]], fulltext[candidate])

2333

annotations[candidate] = list(annotate.reannotate([annotations[p]

2334

for p in parents], fulltext[candidate], candidate, blocks))

2335

return iter(annotations[revision_id])

2336

2337

2338

try:

2339

from bzrlib._knit_load_data_c import _load_data_c as _load_data

2340

except ImportError:

2341

from bzrlib._knit_load_data_py import _load_data_py as _load_data

Older »