~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2005-06-22 06:37:43 UTC
Revision ID: mbp@sourcefrog.net-20050622063743-e395f04c4db8977f

- move old blackbox code from testbzr into bzrlib.selftest.blackbox

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/statcache.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/HttpServer.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/README.1st

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/developers

doc/developers/HACKING

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/shared_repository_layouts.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

doc/version_info.txt

generate_docs.py

man1

profile_imports.py

tools

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/elementtree/ => elementtree/

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/help.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh/_bzr

elementtree/ElementTree.py

setup.py *

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Knit versionedfile implementation.

A knit is a versioned file implementation that supports efficient append only

updates.

Knit file layout:

lifeless: the data file is made up of "delta records". each delta record has a delta header

that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of

the -expanded data- (ie, the delta applied to the parent). the delta also ends with a

end-marker; simply "end VERSION"

delta can be line or full contents.a

... the 8's there are the index number of the annotation.

version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e

59,59,3

8 if ie.executable:

8 e.set('executable', 'yes')

130,130,2

8 if elt.get('executable') == 'yes':

8 ie.executable = True

end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad

whats in an index:

09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents

09:33 < jrydberg> lifeless: the parents are currently dictionary compressed

09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)

09:33 < lifeless> right

09:33 < jrydberg> lifeless: the position and size is the range in the data file

so the index sequence is the dictionary compressed sequence number used

in the deltas to provide line annotation

"""

# TODOS:

# 10:16 < lifeless> make partial index writes safe

# 10:16 < lifeless> implement 'knit.check()' like weave.check()

# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave

# always' approach.

# move sha1 out of the content so that join is faster at verifying parents

# record content length ?

from copy import copy

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

104

105

# TODO: Split out code specific to this format into an associated object.

106

107

# TODO: Can we put in some kind of value to check that the index and data

108

# files belong together?

109

110

# TODO: accommodate binaries, perhaps by storing a byte count

111

112

# TODO: function to check whole file

113

114

# TODO: atomically append data, then measure backwards from the cursor

115

# position after writing to work out where it was located. we may need to

116

# bypass python file buffering.

117

118

DATA_SUFFIX = '.knit'

119

INDEX_SUFFIX = '.kndx'

120

121

122

class KnitContent(object):

123

"""Content of a knit version to which deltas can be applied."""

124

125

def __init__(self, lines):

126

self._lines = lines

127

128

def annotate_iter(self):

129

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

131

132

def annotate(self):

133

"""Return a list of (origin, text) tuples."""

134

return list(self.annotate_iter())

135

136

def line_delta_iter(self, new_lines):

137

"""Generate line-based delta from this content to new_lines."""

138

new_texts = new_lines.text()

139

old_texts = self.text()

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

143

continue

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

146

147

def line_delta(self, new_lines):

148

return list(self.line_delta_iter(new_lines))

149

150

def text(self):

151

return [text for origin, text in self._lines]

152

153

def copy(self):

154

return KnitContent(self._lines[:])

155

156

157

class _KnitFactory(object):

158

"""Base factory for creating content objects."""

159

160

def make(self, lines, version_id):

161

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

163

164

165

class KnitAnnotateFactory(_KnitFactory):

166

"""Factory for creating annotated Content objects."""

167

168

annotated = True

169

170

def parse_fulltext(self, content, version_id):

171

"""Convert fulltext to internal representation

172

173

fulltext content is of the format

174

revid(utf8) plaintext\n

175

internal representation is of the format:

176

(revid, plaintext)

177

"""

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

183

return KnitContent(lines)

184

185

def parse_line_delta_iter(self, lines):

186

return iter(self.parse_line_delta(lines))

187

188

def parse_line_delta(self, lines, version_id):

189

"""Convert a line based delta into internal representation.

190

191

line delta is in the form of:

192

intstart intend intcount

193

1..count lines:

194

revid(utf8) newline\n

195

internal representation is

196

(start, end, count, [1..count tuples (revid, newline)])

197

"""

198

result = []

199

lines = iter(lines)

200

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

207

# walk through the lines parsing.

208

for header in lines:

209

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

211

result.append((start, end, count, contents))

212

return result

213

214

def get_fulltext_content(self, lines):

215

"""Extract just the content lines from a fulltext."""

216

return (line.split(' ', 1)[1] for line in lines)

217

218

def get_linedelta_content(self, lines):

219

"""Extract just the content from a line delta.

220

221

This doesn't return all of the extra information stored in a delta.

222

Only the actual content lines.

223

"""

224

lines = iter(lines)

225

next = lines.next

226

for header in lines:

227

header = header.split(',')

228

count = int(header[2])

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

231

yield text

232

233

def lower_fulltext(self, content):

234

"""convert a fulltext content record into a serializable form.

235

236

see parse_fulltext which this inverts.

237

"""

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

241

242

def lower_line_delta(self, delta):

243

"""convert a delta into a serializable form.

244

245

See parse_line_delta which this inverts.

246

"""

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

249

out = []

250

for start, end, c, lines in delta:

251

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

253

for origin, text in lines)

254

return out

255

256

257

class KnitPlainFactory(_KnitFactory):

258

"""Factory for creating plain Content objects."""

259

260

annotated = False

261

262

def parse_fulltext(self, content, version_id):

263

"""This parses an unannotated fulltext.

264

265

Note that this is not a noop - the internal representation

266

has (versionid, line) - its just a constant versionid.

267

"""

268

return self.make(content, version_id)

269

270

def parse_line_delta_iter(self, lines, version_id):

271

cur = 0

272

num_lines = len(lines)

273

while cur < num_lines:

274

header = lines[cur]

275

cur += 1

276

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

278

cur += c

279

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

282

283

def get_fulltext_content(self, lines):

284

"""Extract just the content lines from a fulltext."""

285

return iter(lines)

286

287

def get_linedelta_content(self, lines):

288

"""Extract just the content from a line delta.

289

290

This doesn't return all of the extra information stored in a delta.

291

Only the actual content lines.

292

"""

293

lines = iter(lines)

294

next = lines.next

295

for header in lines:

296

header = header.split(',')

297

count = int(header[2])

298

for i in xrange(count):

299

yield next()

300

301

def lower_fulltext(self, content):

302

return content.text()

303

304

def lower_line_delta(self, delta):

305

out = []

306

for start, end, c, lines in delta:

307

out.append('%d,%d,%d\n' % (start, end, c))

308

out.extend([text for origin, text in lines])

309

return out

310

311

312

def make_empty_knit(transport, relpath):

313

"""Construct a empty knit at the specified location."""

314

k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)

315

k._data._open_file()

316

317

318

class KnitVersionedFile(VersionedFile):

319

"""Weave-like structure with faster random access.

320

321

A knit stores a number of texts and a summary of the relationships

322

between them. Texts are identified by a string version-id. Texts

323

are normally stored and retrieved as a series of lines, but can

324

also be passed as single strings.

325

326

Lines are stored with the trailing newline (if any) included, to

327

avoid special cases for files with no final newline. Lines are

328

composed of 8-bit characters, not unicode. The combination of

329

these approaches should mean any 'binary' file can be safely

330

stored and retrieved.

331

"""

332

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None):

337

"""Construct a knit at location specified by relpath.

338

339

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

345

"""

346

if deprecated_passed(basis_knit):

347

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

348

" deprecated as of bzr 0.9.",

349

DeprecationWarning, stacklevel=2)

350

if access_mode is None:

351

access_mode = 'w'

352

super(KnitVersionedFile, self).__init__(access_mode)

353

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

354

self.transport = transport

355

self.filename = relpath

356

self.factory = factory or KnitAnnotateFactory()

357

self.writable = (access_mode == 'w')

358

self.delta = delta

359

360

self._max_delta_chain = 200

361

362

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

363

access_mode, create=create, file_mode=file_mode,

364

create_parent_dir=create_parent_dir, delay_create=delay_create,

365

dir_mode=dir_mode)

366

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

367

access_mode, create=create and not len(self), file_mode=file_mode,

368

create_parent_dir=create_parent_dir, delay_create=delay_create,

369

dir_mode=dir_mode)

370

371

def __repr__(self):

372

return '%s(%s)' % (self.__class__.__name__,

373

self.transport.abspath(self.filename))

374

375

def _check_should_delta(self, first_parents):

376

"""Iterate back through the parent listing, looking for a fulltext.

377

378

This is used when we want to decide whether to add a delta or a new

379

fulltext. It searches for _max_delta_chain parents. When it finds a

380

fulltext parent, it sees if the total size of the deltas leading up to

381

it is large enough to indicate that we want a new full text anyway.

382

383

Return True if we should create a new delta, False if we should use a

384

full text.

385

"""

386

delta_size = 0

387

fulltext_size = None

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

394

fulltext_size = size

395

break

396

delta_size += size

397

delta_parents = self._index.get_parents(parent)

398

else:

399

# We couldn't find a fulltext, so we must create a new one

400

return False

401

402

return fulltext_size > delta_size

403

404

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

405

"""See VersionedFile._add_delta()."""

406

self._check_add(version_id, []) # should we check the lines ?

407

self._check_versions_present(parents)

408

present_parents = []

409

ghosts = []

410

parent_texts = {}

411

for parent in parents:

412

if not self.has_version(parent):

413

ghosts.append(parent)

414

else:

415

present_parents.append(parent)

416

417

if delta_parent is None:

418

# reconstitute as full text.

419

assert len(delta) == 1 or len(delta) == 0

420

if len(delta):

421

assert delta[0][0] == 0

422

assert delta[0][1] == 0, delta[0][1]

423

return super(KnitVersionedFile, self)._add_delta(version_id,

424

parents,

425

delta_parent,

426

sha1,

427

noeol,

428

delta)

429

430

digest = sha1

431

432

options = []

433

if noeol:

434

options.append('no-eol')

435

436

if delta_parent is not None:

437

# determine the current delta chain length.

438

# To speed the extract of texts the delta chain is limited

439

# to a fixed number of deltas. This should minimize both

440

# I/O and the time spend applying deltas.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

446

return super(KnitVersionedFile, self)._add_delta(version_id,

447

parents,

448

delta_parent,

449

sha1,

450

noeol,

451

delta)

452

453

options.append('line-delta')

454

store_lines = self.factory.lower_line_delta(delta)

455

456

where, size = self._data.add_record(version_id, digest, store_lines)

457

self._index.add_version(version_id, options, where, size, parents)

458

459

def _add_raw_records(self, records, data):

460

"""Add all the records 'records' with data pre-joined in 'data'.

461

462

:param records: A list of tuples(version_id, options, parents, size).

463

:param data: The data for the records. When it is written, the records

464

are adjusted to have pos pointing into data by the sum of

465

the preceding records sizes.

466

"""

467

# write all the data

468

pos = self._data.add_raw_record(data)

469

offset = 0

470

index_entries = []

471

for (version_id, options, parents, size) in records:

472

index_entries.append((version_id, options, pos+offset,

473

size, parents))

474

if self._data._do_cache:

475

self._data._cache[version_id] = data[offset:offset+size]

476

offset += size

477

self._index.add_versions(index_entries)

478

479

def enable_cache(self):

480

"""Start caching data for this knit"""

481

self._data.enable_cache()

482

483

def clear_cache(self):

484

"""Clear the data cache only."""

485

self._data.clear_cache()

486

487

def copy_to(self, name, transport):

488

"""See VersionedFile.copy_to()."""

489

# copy the current index to a temp index to avoid racing with local

490

# writes

491

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

492

self.transport.get(self._index._filename))

493

# copy the data file

494

f = self._data._open_file()

495

try:

496

transport.put_file(name + DATA_SUFFIX, f)

497

finally:

498

f.close()

499

# move the copied index into place

500

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

501

502

def create_empty(self, name, transport, mode=None):

503

return KnitVersionedFile(name, transport, factory=self.factory,

504

delta=self.delta, create=True)

505

506

def _fix_parents(self, version_id, new_parents):

507

"""Fix the parents list for version.

508

509

This is done by appending a new version to the index

510

with identical data except for the parents list.

511

the parents list must be a superset of the current

512

list.

513

"""

514

current_values = self._index._cache[version_id]

515

assert set(current_values[4]).difference(set(new_parents)) == set()

516

self._index.add_version(version_id,

517

current_values[1],

518

current_values[2],

519

current_values[3],

520

new_parents)

521

522

def get_delta(self, version_id):

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

526

if not self.has_version(version_id):

527

raise RevisionNotPresent(version_id, self.filename)

528

529

parents = self.get_parents(version_id)

530

if len(parents):

531

parent = parents[0]

532

else:

533

parent = None

534

data_pos, data_size = self._index.get_position(version_id)

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

536

noeol = 'no-eol' in self._index.get_options(version_id)

537

if 'fulltext' == self._index.get_method(version_id):

538

new_content = self.factory.parse_fulltext(data, version_id)

539

if parent is not None:

540

reference_content = self._get_content(parent)

541

old_texts = reference_content.text()

542

else:

543

old_texts = []

544

new_texts = new_content.text()

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

547

else:

548

delta = self.factory.parse_line_delta(data, version_id)

549

return parent, sha1, noeol, delta

550

551

def get_graph_with_ghosts(self):

552

"""See VersionedFile.get_graph_with_ghosts()."""

553

graph_items = self._index.get_graph()

554

return dict(graph_items)

555

556

def get_sha1(self, version_id):

557

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

559

record_map = self._get_record_map([version_id])

560

method, content, digest, next = record_map[version_id]

561

return digest

562

563

@staticmethod

564

def get_suffixes():

565

"""See VersionedFile.get_suffixes()."""

566

return [DATA_SUFFIX, INDEX_SUFFIX]

567

568

def has_ghost(self, version_id):

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

571

# maybe we have it

572

if self.has_version(version_id):

573

return False

574

# optimisable if needed by memoising the _ghosts set.

575

items = self._index.get_graph()

576

for node, parents in items:

577

for parent in parents:

578

if parent not in self._index._cache:

579

if parent == version_id:

580

return True

581

return False

582

583

def versions(self):

584

"""See VersionedFile.versions."""

585

return self._index.get_versions()

586

587

def has_version(self, version_id):

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

590

return self._index.has_version(version_id)

591

592

__contains__ = has_version

593

594

def _merge_annotations(self, content, parents, parent_texts={},

595

delta=None, annotated=None):

596

"""Merge annotations for content. This is done by comparing

597

the annotations based on changed to the text.

598

"""

599

if annotated:

600

delta_seq = None

601

for parent_id in parents:

602

merge_content = self._get_content(parent_id, parent_texts)

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

605

if delta_seq is None:

606

# setup a delta seq to reuse.

607

delta_seq = seq

608

for i, j, n in seq.get_matching_blocks():

609

if n == 0:

610

continue

611

# this appears to copy (origin, text) pairs across to the new

612

# content for any line that matches the last-checked parent.

613

# FIXME: save the sequence control data for delta compression

614

# against the most relevant parent rather than rediffing.

615

content._lines[j:j+n] = merge_content._lines[i:i+n]

616

if delta:

617

if not annotated:

618

reference_content = self._get_content(parents[0], parent_texts)

619

new_texts = content.text()

620

old_texts = reference_content.text()

621

delta_seq = patiencediff.PatienceSequenceMatcher(

622

None, old_texts, new_texts)

623

return self._make_line_delta(delta_seq, content)

624

625

def _make_line_delta(self, delta_seq, new_content):

626

"""Generate a line delta from delta_seq and new_content."""

627

diff_hunks = []

628

for op in delta_seq.get_opcodes():

629

if op[0] == 'equal':

630

continue

631

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

632

return diff_hunks

633

634

def _get_components_positions(self, version_ids):

635

"""Produce a map of position data for the components of versions.

636

637

This data is intended to be used for retrieving the knit records.

638

639

A dict of version_id to (method, data_pos, data_size, next) is

640

returned.

641

method is the way referenced data should be applied.

642

data_pos is the position of the data in the knit.

643

data_size is the size of the data in the knit.

644

next is the build-parent of the version, or None for fulltexts.

645

"""

646

component_data = {}

647

for version_id in version_ids:

648

cursor = version_id

649

650

while cursor is not None and cursor not in component_data:

651

method = self._index.get_method(cursor)

652

if method == 'fulltext':

653

next = None

654

else:

655

next = self.get_parents(cursor)[0]

656

data_pos, data_size = self._index.get_position(cursor)

657

component_data[cursor] = (method, data_pos, data_size, next)

658

cursor = next

659

return component_data

660

661

def _get_content(self, version_id, parent_texts={}):

662

"""Returns a content object that makes up the specified

663

version."""

664

if not self.has_version(version_id):

665

raise RevisionNotPresent(version_id, self.filename)

666

667

cached_version = parent_texts.get(version_id, None)

668

if cached_version is not None:

669

return cached_version

670

671

text_map, contents_map = self._get_content_maps([version_id])

672

return contents_map[version_id]

673

674

def _check_versions_present(self, version_ids):

675

"""Check that all specified versions are present."""

676

self._index.check_versions_present(version_ids)

677

678

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

679

"""See VersionedFile.add_lines_with_ghosts()."""

680

self._check_add(version_id, lines)

681

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

682

683

def _add_lines(self, version_id, parents, lines, parent_texts):

684

"""See VersionedFile.add_lines."""

685

self._check_add(version_id, lines)

686

self._check_versions_present(parents)

687

return self._add(version_id, lines[:], parents, self.delta, parent_texts)

688

689

def _check_add(self, version_id, lines):

690

"""check that version_id and lines are safe to add."""

691

assert self.writable, "knit is not opened for write"

692

### FIXME escape. RBC 20060228

693

if contains_whitespace(version_id):

694

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

696

if self.has_version(version_id):

697

raise RevisionAlreadyPresent(version_id, self.filename)

698

self._check_lines_not_unicode(lines)

699

self._check_lines_are_lines(lines)

700

701

def _add(self, version_id, lines, parents, delta, parent_texts):

702

"""Add a set of lines on top of version specified by parents.

703

704

If delta is true, compress the text as a line-delta against

705

the first parent.

706

707

Any versions not present will be converted into ghosts.

708

"""

709

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

710

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

711

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

712

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

713

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

714

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

715

# +1383 0 8.0370 8.0370 +<len>

716

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

717

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

718

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

719

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

720

721

present_parents = []

722

ghosts = []

723

if parent_texts is None:

724

parent_texts = {}

725

for parent in parents:

726

if not self.has_version(parent):

727

ghosts.append(parent)

728

else:

729

present_parents.append(parent)

730

731

if delta and not len(present_parents):

732

delta = False

733

734

digest = sha_strings(lines)

735

options = []

736

if lines:

737

if lines[-1][-1] != '\n':

738

options.append('no-eol')

739

lines[-1] = lines[-1] + '\n'

740

741

if len(present_parents) and delta:

742

# To speed the extract of texts the delta chain is limited

743

# to a fixed number of deltas. This should minimize both

744

# I/O and the time spend applying deltas.

745

delta = self._check_should_delta(present_parents)

746

747

assert isinstance(version_id, str)

748

lines = self.factory.make(lines, version_id)

749

if delta or (self.factory.annotated and len(present_parents) > 0):

750

# Merge annotations from parent texts if so is needed.

751

delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,

752

delta, self.factory.annotated)

753

754

if delta:

755

options.append('line-delta')

756

store_lines = self.factory.lower_line_delta(delta_hunks)

757

else:

758

options.append('fulltext')

759

store_lines = self.factory.lower_fulltext(lines)

760

761

where, size = self._data.add_record(version_id, digest, store_lines)

762

self._index.add_version(version_id, options, where, size, parents)

763

return lines

764

765

def check(self, progress_bar=None):

766

"""See VersionedFile.check()."""

767

768

def _clone_text(self, new_version_id, old_version_id, parents):

769

"""See VersionedFile.clone_text()."""

770

# FIXME RBC 20060228 make fast by only inserting an index with null

771

# delta.

772

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

773

774

def get_lines(self, version_id):

775

"""See VersionedFile.get_lines()."""

776

return self.get_line_list([version_id])[0]

777

778

def _get_record_map(self, version_ids):

779

"""Produce a dictionary of knit records.

780

781

The keys are version_ids, the values are tuples of (method, content,

782

digest, next).

783

method is the way the content should be applied.

784

content is a KnitContent object.

785

digest is the SHA1 digest of this version id after all steps are done

786

next is the build-parent of the version, i.e. the leftmost ancestor.

787

If the method is fulltext, next will be None.

788

"""

789

position_map = self._get_components_positions(version_ids)

790

# c = component_id, m = method, p = position, s = size, n = next

791

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

792

record_map = {}

793

for component_id, content, digest in \

794

self._data.read_records_iter(records):

795

method, position, size, next = position_map[component_id]

796

record_map[component_id] = method, content, digest, next

797

798

return record_map

799

800

def get_text(self, version_id):

801

"""See VersionedFile.get_text"""

802

return self.get_texts([version_id])[0]

803

804

def get_texts(self, version_ids):

805

return [''.join(l) for l in self.get_line_list(version_ids)]

806

807

def get_line_list(self, version_ids):

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

text_map, content_map = self._get_content_maps(version_ids)

813

return [text_map[v] for v in version_ids]

814

815

def _get_content_maps(self, version_ids):

816

"""Produce maps of text and KnitContents

817

818

:return: (text_map, content_map) where text_map contains the texts for

819

the requested versions and content_map contains the KnitContents.

820

Both dicts take version_ids as their keys.

821

"""

822

for version_id in version_ids:

823

if not self.has_version(version_id):

824

raise RevisionNotPresent(version_id, self.filename)

825

record_map = self._get_record_map(version_ids)

826

827

text_map = {}

828

content_map = {}

829

final_content = {}

830

for version_id in version_ids:

831

components = []

832

cursor = version_id

833

while cursor is not None:

834

method, data, digest, next = record_map[cursor]

835

components.append((cursor, method, data, digest))

836

if cursor in content_map:

837

break

838

cursor = next

839

840

content = None

841

for component_id, method, data, digest in reversed(components):

842

if component_id in content_map:

843

content = content_map[component_id]

844

else:

845

if method == 'fulltext':

846

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

848

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

850

content = content.copy()

851

content._lines = self._apply_delta(content._lines,

852

delta)

853

content_map[component_id] = content

854

855

if 'no-eol' in self._index.get_options(version_id):

856

content = content.copy()

857

line = content._lines[-1][1].rstrip('\n')

858

content._lines[-1] = (content._lines[-1][0], line)

859

final_content[version_id] = content

860

861

# digest here is the digest from the last applied component.

862

text = content.text()

863

if sha_strings(text) != digest:

864

raise KnitCorrupt(self.filename,

865

'sha-1 does not match %s' % version_id)

866

867

text_map[version_id] = text

868

return text_map, final_content

869

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

873

if version_ids is None:

874

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

# we don't care about inclusions, the caller cares.

880

# but we need to setup a list of records to visit.

881

# we need version_id, position, length

882

version_id_records = []

883

requested_versions = set(version_ids)

884

# filter for available versions

885

for version_id in requested_versions:

886

if not self.has_version(version_id):

887

raise RevisionNotPresent(version_id, self.filename)

888

# get a in-component-order queue:

889

for version_id in self.versions():

890

if version_id in requested_versions:

891

data_pos, length = self._index.get_position(version_id)

892

version_id_records.append((version_id, data_pos, length))

893

894

total = len(version_id_records)

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

909

910

def num_versions(self):

911

"""See VersionedFile.num_versions()."""

912

return self._index.num_versions()

913

914

__len__ = num_versions

915

916

def annotate_iter(self, version_id):

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

919

content = self._get_content(version_id)

920

for origin, text in content.annotate_iter():

921

yield origin, text

922

923

def get_parents(self, version_id):

924

"""See VersionedFile.get_parents."""

925

# perf notes:

926

# optimism counts!

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

929

try:

930

return self._index.get_parents(version_id)

931

except KeyError:

932

raise RevisionNotPresent(version_id, self.filename)

933

934

def get_parents_with_ghosts(self, version_id):

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

937

try:

938

return self._index.get_parents_with_ghosts(version_id)

939

except KeyError:

940

raise RevisionNotPresent(version_id, self.filename)

941

942

def get_ancestry(self, versions, topo_sorted=True):

943

"""See VersionedFile.get_ancestry."""

944

if isinstance(versions, basestring):

945

versions = [versions]

946

if not versions:

947

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

949

return self._index.get_ancestry(versions, topo_sorted)

950

951

def get_ancestry_with_ghosts(self, versions):

952

"""See VersionedFile.get_ancestry_with_ghosts."""

953

if isinstance(versions, basestring):

954

versions = [versions]

955

if not versions:

956

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

958

return self._index.get_ancestry_with_ghosts(versions)

959

960

#@deprecated_method(zero_eight)

961

def walk(self, version_ids):

962

"""See VersionedFile.walk."""

963

# We take the short path here, and extract all relevant texts

964

# and put them in a weave and let that do all the work. Far

965

# from optimal, but is much simpler.

966

# FIXME RB 20060228 this really is inefficient!

967

from bzrlib.weave import Weave

968

969

w = Weave(self.filename)

970

ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))

971

sorted_graph = topo_sort(self._index.get_graph())

972

version_list = [vid for vid in sorted_graph if vid in ancestry]

973

974

for version_id in version_list:

975

lines = self.get_lines(version_id)

976

w.add_lines(version_id, self.get_parents(version_id), lines)

977

978

for lineno, insert_id, dset, line in w.walk(version_ids):

979

yield lineno, insert_id, dset, line

980

981

def plan_merge(self, ver_a, ver_b):

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

985

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

986

def status_a(revision, text):

987

if revision in ancestors_b:

988

return 'killed-b', text

989

else:

990

return 'new-a', text

991

992

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

993

def status_b(revision, text):

994

if revision in ancestors_a:

995

return 'killed-a', text

996

else:

997

return 'new-b', text

998

999

annotated_a = self.annotate(ver_a)

1000

annotated_b = self.annotate(ver_b)

1001

plain_a = [t for (a, t) in annotated_a]

1002

plain_b = [t for (a, t) in annotated_b]

1003

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

1004

a_cur = 0

1005

b_cur = 0

1006

for ai, bi, l in blocks:

1007

# process all mismatched sections

1008

# (last mismatched section is handled because blocks always

1009

# includes a 0-length last block)

1010

for revision, text in annotated_a[a_cur:ai]:

1011

yield status_a(revision, text)

1012

for revision, text in annotated_b[b_cur:bi]:

1013

yield status_b(revision, text)

1014

1015

# and now the matched section

1016

a_cur = ai + l

1017

b_cur = bi + l

1018

for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):

1019

assert text_a == text_b

1020

yield "unchanged", text_a

1021

1022

1023

class _KnitComponentFile(object):

1024

"""One of the files used to implement a knit database"""

1025

1026

def __init__(self, transport, filename, mode, file_mode=None,

1027

create_parent_dir=False, dir_mode=None):

1028

self._transport = transport

1029

self._filename = filename

1030

self._mode = mode

1031

self._file_mode = file_mode

1032

self._dir_mode = dir_mode

1033

self._create_parent_dir = create_parent_dir

1034

self._need_to_create = False

1035

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

1039

1040

def check_header(self, fp):

1041

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

1046

if line != self.HEADER:

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

1049

1050

def commit(self):

1051

"""Commit is a nop."""

1052

1053

def __repr__(self):

1054

return '%s(%s)' % (self.__class__.__name__, self._filename)

1055

1056

1057

class _KnitIndex(_KnitComponentFile):

1058

"""Manages knit index file.

1059

1060

The index is already kept in memory and read on startup, to enable

1061

fast lookups of revision information. The cursor of the index

1062

file is always pointing to the end, making it easy to append

1063

entries.

1064

1065

_cache is a cache for fast mapping from version id to a Index

1066

object.

1067

1068

_history is a cache for fast mapping from indexes to version ids.

1069

1070

The index data format is dictionary compressed when it comes to

1071

parent references; a index entry may only have parents that with a

1072

lover index number. As a result, the index is topological sorted.

1073

1074

Duplicate entries may be written to the index for a single version id

1075

if this is done then the latter one completely replaces the former:

1076

this allows updates to correct version and parent information.

1077

Note that the two entries may share the delta, and that successive

1078

annotations and references MUST point to the first entry.

1079

1080

The index file on disc contains a header, followed by one line per knit

1081

record. The same revision can be present in an index file more than once.

1082

The first occurrence gets assigned a sequence number starting from 0.

1083

1084

The format of a single line is

1085

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1086

REVISION_ID is a utf8-encoded revision id

1087

FLAGS is a comma separated list of flags about the record. Values include

1088

no-eol, line-delta, fulltext.

1089

BYTE_OFFSET is the ascii representation of the byte offset in the data file

1090

that the the compressed data starts at.

1091

LENGTH is the ascii representation of the length of the data file.

1092

PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of

1093

REVISION_ID.

1094

PARENT_SEQUENCE_ID the ascii representation of the sequence number of a

1095

revision id already in the knit that is a parent of REVISION_ID.

1096

The ' :' marker is the end of record marker.

1097

1098

partial writes:

1099

when a write is interrupted to the index file, it will result in a line

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

1101

or at the end of the file, then the record that is missing it will be

1102

ignored by the parser.

1103

1104

When writing new records to the index file, the data is preceded by '\n'

1105

to ensure that records always start on new lines even if the last write was

1106

interrupted. As a result its normal for the last line in the index to be

1107

missing a trailing newline. One can be added with no harmful effects.

1108

"""

1109

1110

HEADER = "# bzr knit index 8\n"

1111

1112

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

1113

# __slots__ = ['_cache', '_history', '_transport', '_filename']

1114

1115

def _cache_version(self, version_id, options, pos, size, parents):

1116

"""Cache a version record in the history array and index cache.

1117

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

1119

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1120

indexes).

1121

"""

1122

# only want the _history index to reference the 1st index entry

1123

# for version_id

1124

if version_id not in self._cache:

1125

index = len(self._history)

1126

self._history.append(version_id)

1127

else:

1128

index = self._cache[version_id][5]

1129

self._cache[version_id] = (version_id,

1130

options,

1131

pos,

1132

size,

1133

parents,

1134

index)

1135

1136

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1137

create_parent_dir=False, delay_create=False, dir_mode=None):

1138

_KnitComponentFile.__init__(self, transport, filename, mode,

1139

file_mode=file_mode,

1140

create_parent_dir=create_parent_dir,

1141

dir_mode=dir_mode)

1142

self._cache = {}

1143

# position in _history is the 'official' index for a revision

1144

# but the values may have come from a newer entry.

1145

# so - wc -l of a knit index is != the number of unique names

1146

# in the knit.

1147

self._history = []

1148

try:

1149

fp = self._transport.get(self._filename)

1150

try:

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

1186

continue

1187

1188

try:

1189

parents = []

1190

for value in rec[4:-1]:

1191

if value[0] == '.':

1192

# uncompressed reference

1193

parent_id = value[1:]

1194

else:

1195

parent_id = history[int(value)]

1196

parents.append(parent_id)

1197

except (IndexError, ValueError), e:

1198

# The parent could not be decoded to get its parent row. This

1199

# at a minimum will cause this row to have wrong parents, or

1200

# even to apply a delta to the wrong base and decode

1201

# incorrectly. its therefore not usable, and because we have

1202

# encountered a situation where a new knit index had this

1203

# corrupt we can't asssume that no other rows referring to the

1204

# index of this record actually mean the subsequent uncorrupt

1205

# one, so we error.

1206

raise errors.KnitCorrupt(self._filename,

1207

"line %r: %s" % (rec, e))

1208

1209

version_id, options, pos, size = rec[:4]

1210

version_id = version_id

1211

1212

# See self._cache_version

1213

# only want the _history index to reference the 1st

1214

# index entry for version_id

1215

if version_id not in cache:

1216

history_top += 1

1217

index = history_top

1218

history.append(version_id)

1219

else:

1220

index = cache[version_id][5]

1221

cache[version_id] = (version_id,

1222

options.split(','),

1223

int(pos),

1224

int(size),

1225

parents,

1226

index)

1227

# end self._cache_version

1228

1229

def get_graph(self):

1230

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1231

1232

def get_ancestry(self, versions, topo_sorted=True):

1233

"""See VersionedFile.get_ancestry."""

1234

# get a graph of all the mentioned versions:

1235

graph = {}

1236

pending = set(versions)

1237

cache = self._cache

1238

while pending:

1239

version = pending.pop()

1240

# trim ghosts

1241

try:

1242

parents = [p for p in cache[version][4] if p in cache]

1243

except KeyError:

1244

raise RevisionNotPresent(version, self._filename)

1245

# if not completed and not a ghost

1246

pending.update([p for p in parents if p not in graph])

1247

graph[version] = parents

1248

if not topo_sorted:

1249

return graph.keys()

1250

return topo_sort(graph.items())

1251

1252

def get_ancestry_with_ghosts(self, versions):

1253

"""See VersionedFile.get_ancestry_with_ghosts."""

1254

# get a graph of all the mentioned versions:

1255

self.check_versions_present(versions)

1256

cache = self._cache

1257

graph = {}

1258

pending = set(versions)

1259

while pending:

1260

version = pending.pop()

1261

try:

1262

parents = cache[version][4]

1263

except KeyError:

1264

# ghost, fake it

1265

graph[version] = []

1266

else:

1267

# if not completed

1268

pending.update([p for p in parents if p not in graph])

1269

graph[version] = parents

1270

return topo_sort(graph.items())

1271

1272

def num_versions(self):

1273

return len(self._history)

1274

1275

__len__ = num_versions

1276

1277

def get_versions(self):

1278

return self._history

1279

1280

def idx_to_name(self, idx):

1281

return self._history[idx]

1282

1283

def lookup(self, version_id):

1284

assert version_id in self._cache

1285

return self._cache[version_id][5]

1286

1287

def _version_list_to_index(self, versions):

1288

result_list = []

1289

cache = self._cache

1290

for version in versions:

1291

if version in cache:

1292

# -- inlined lookup() --

1293

result_list.append(str(cache[version][5]))

1294

# -- end lookup () --

1295

else:

1296

result_list.append('.' + version)

1297

return ' '.join(result_list)

1298

1299

def add_version(self, version_id, options, pos, size, parents):

1300

"""Add a version record to the index."""

1301

self.add_versions(((version_id, options, pos, size, parents),))

1302

1303

def add_versions(self, versions):

1304

"""Add multiple versions to the index.

1305

1306

:param versions: a list of tuples:

1307

(version_id, options, pos, size, parents).

1308

"""

1309

lines = []

1310

orig_history = self._history[:]

1311

orig_cache = self._cache.copy()

1312

1313

try:

1314

for version_id, options, pos, size, parents in versions:

1315

line = "\n%s %s %s %s %s :" % (version_id,

1316

','.join(options),

1317

pos,

1318

size,

1319

self._version_list_to_index(parents))

1320

assert isinstance(line, str), \

1321

'content must be utf-8 encoded: %r' % (line,)

1322

lines.append(line)

1323

self._cache_version(version_id, options, pos, size, parents)

1324

if not self._need_to_create:

1325

self._transport.append_bytes(self._filename, ''.join(lines))

1326

else:

1327

sio = StringIO()

1328

sio.write(self.HEADER)

1329

sio.writelines(lines)

1330

sio.seek(0)

1331

self._transport.put_file_non_atomic(self._filename, sio,

1332

create_parent_dir=self._create_parent_dir,

1333

mode=self._file_mode,

1334

dir_mode=self._dir_mode)

1335

self._need_to_create = False

1336

except:

1337

# If any problems happen, restore the original values and re-raise

1338

self._history = orig_history

1339

self._cache = orig_cache

1340

raise

1341

1342

def has_version(self, version_id):

1343

"""True if the version is in the index."""

1344

return version_id in self._cache

1345

1346

def get_position(self, version_id):

1347

"""Return data position and size of specified version."""

1348

entry = self._cache[version_id]

1349

return entry[2], entry[3]

1350

1351

def get_method(self, version_id):

1352

"""Return compression method of specified version."""

1353

options = self._cache[version_id][1]

1354

if 'fulltext' in options:

1355

return 'fulltext'

1356

else:

1357

if 'line-delta' not in options:

1358

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1359

return 'line-delta'

1360

1361

def get_options(self, version_id):

1362

return self._cache[version_id][1]

1363

1364

def get_parents(self, version_id):

1365

"""Return parents of specified version ignoring ghosts."""

1366

return [parent for parent in self._cache[version_id][4]

1367

if parent in self._cache]

1368

1369

def get_parents_with_ghosts(self, version_id):

1370

"""Return parents of specified version with ghosts."""

1371

return self._cache[version_id][4]

1372

1373

def check_versions_present(self, version_ids):

1374

"""Check that all specified versions are present."""

1375

cache = self._cache

1376

for version_id in version_ids:

1377

if version_id not in cache:

1378

raise RevisionNotPresent(version_id, self._filename)

1379

1380

1381

class _KnitData(_KnitComponentFile):

1382

"""Contents of the knit data file"""

1383

1384

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1385

create_parent_dir=False, delay_create=False,

1386

dir_mode=None):

1387

_KnitComponentFile.__init__(self, transport, filename, mode,

1388

file_mode=file_mode,

1389

create_parent_dir=create_parent_dir,

1390

dir_mode=dir_mode)

1391

self._checked = False

1392

# TODO: jam 20060713 conceptually, this could spill to disk

1393

# if the cached size gets larger than a certain amount

1394

# but it complicates the model a bit, so for now just use

1395

# a simple dictionary

1396

self._cache = {}

1397

self._do_cache = False

1398

if create:

1399

if delay_create:

1400

self._need_to_create = create

1401

else:

1402

self._transport.put_bytes_non_atomic(self._filename, '',

1403

mode=self._file_mode)

1404

1405

def enable_cache(self):

1406

"""Enable caching of reads."""

1407

self._do_cache = True

1408

1409

def clear_cache(self):

1410

"""Clear the record cache."""

1411

self._do_cache = False

1412

self._cache = {}

1413

1414

def _open_file(self):

1415

try:

1416

return self._transport.get(self._filename)

1417

except NoSuchFile:

1418

pass

1419

return None

1420

1421

def _record_to_data(self, version_id, digest, lines):

1422

"""Convert version_id, digest, lines into a raw data block.

1423

1424

:return: (len, a StringIO instance with the raw data ready to read.)

1425

"""

1426

sio = StringIO()

1427

data_file = GzipFile(None, mode='wb', fileobj=sio)

1428

1429

assert isinstance(version_id, str)

1430

data_file.writelines(chain(

1431

["version %s %d %s\n" % (version_id,

1432

len(lines),

1433

digest)],

1434

lines,

1435

["end %s\n" % version_id]))

1436

data_file.close()

1437

length= sio.tell()

1438

1439

sio.seek(0)

1440

return length, sio

1441

1442

def add_raw_record(self, raw_data):

1443

"""Append a prepared record to the data file.

1444

1445

:return: the offset in the data file raw_data was written.

1446

"""

1447

assert isinstance(raw_data, str), 'data must be plain bytes'

1448

if not self._need_to_create:

1449

return self._transport.append_bytes(self._filename, raw_data)

1450

else:

1451

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1452

create_parent_dir=self._create_parent_dir,

1453

mode=self._file_mode,

1454

dir_mode=self._dir_mode)

1455

self._need_to_create = False

1456

return 0

1457

1458

def add_record(self, version_id, digest, lines):

1459

"""Write new text record to disk. Returns the position in the

1460

file where it was written."""

1461

size, sio = self._record_to_data(version_id, digest, lines)

1462

# write to disk

1463

if not self._need_to_create:

1464

start_pos = self._transport.append_file(self._filename, sio)

1465

else:

1466

self._transport.put_file_non_atomic(self._filename, sio,

1467

create_parent_dir=self._create_parent_dir,

1468

mode=self._file_mode,

1469

dir_mode=self._dir_mode)

1470

self._need_to_create = False

1471

start_pos = 0

1472

if self._do_cache:

1473

self._cache[version_id] = sio.getvalue()

1474

return start_pos, size

1475

1476

def _parse_record_header(self, version_id, raw_data):

1477

"""Parse a record header for consistency.

1478

1479

:return: the header and the decompressor stream.

1480

as (stream, header_record)

1481

"""

1482

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1483

try:

1484

rec = self._check_header(version_id, df.readline())

1485

except Exception, e:

1486

raise KnitCorrupt(self._filename,

1487

"While reading {%s} got %s(%s)"

1488

% (version_id, e.__class__.__name__, str(e)))

1489

return df, rec

1490

1491

def _check_header(self, version_id, line):

1492

rec = line.split()

1493

if len(rec) != 4:

1494

raise KnitCorrupt(self._filename,

1495

'unexpected number of elements in record header')

1496

if rec[1] != version_id:

1497

raise KnitCorrupt(self._filename,

1498

'unexpected version, wanted %r, got %r'

1499

% (version_id, rec[1]))

1500

return rec

1501

1502

def _parse_record(self, version_id, data):

1503

# profiling notes:

1504

# 4168 calls in 2880 217 internal

1505

# 4168 calls to _parse_record_header in 2121

1506

# 4168 calls to readlines in 330

1507

df = GzipFile(mode='rb', fileobj=StringIO(data))

1508

1509

try:

1510

record_contents = df.readlines()

1511

except Exception, e:

1512

raise KnitCorrupt(self._filename,

1513

"While reading {%s} got %s(%s)"

1514

% (version_id, e.__class__.__name__, str(e)))

1515

header = record_contents.pop(0)

1516

rec = self._check_header(version_id, header)

1517

1518

last_line = record_contents.pop()

1519

if len(record_contents) != int(rec[2]):

1520

raise KnitCorrupt(self._filename,

1521

'incorrect number of lines %s != %s'

1522

' for version {%s}'

1523

% (len(record_contents), int(rec[2]),

1524

version_id))

1525

if last_line != 'end %s\n' % rec[1]:

1526

raise KnitCorrupt(self._filename,

1527

'unexpected version end line %r, wanted %r'

1528

% (last_line, version_id))

1529

df.close()

1530

return record_contents, rec[3]

1531

1532

def read_records_iter_raw(self, records):

1533

"""Read text records from data file and yield raw data.

1534

1535

This unpacks enough of the text record to validate the id is

1536

as expected but thats all.

1537

"""

1538

# setup an iterator of the external records:

1539

# uses readv so nice and fast we hope.

1540

if len(records):

1541

# grab the disk data needed.

1542

if self._cache:

1543

# Don't check _cache if it is empty

1544

needed_offsets = [(pos, size) for version_id, pos, size

1545

in records

1546

if version_id not in self._cache]

1547

else:

1548

needed_offsets = [(pos, size) for version_id, pos, size

1549

in records]

1550

1551

raw_records = self._transport.readv(self._filename, needed_offsets)

1552

1553

for version_id, pos, size in records:

1554

if version_id in self._cache:

1555

# This data has already been validated

1556

data = self._cache[version_id]

1557

else:

1558

pos, data = raw_records.next()

1559

if self._do_cache:

1560

self._cache[version_id] = data

1561

1562

# validate the header

1563

df, rec = self._parse_record_header(version_id, data)

1564

df.close()

1565

yield version_id, data

1566

1567

def read_records_iter(self, records):

1568

"""Read text records from data file and yield result.

1569

1570

The result will be returned in whatever is the fastest to read.

1571

Not by the order requested. Also, multiple requests for the same

1572

record will only yield 1 response.

1573

:param records: A list of (version_id, pos, len) entries

1574

:return: Yields (version_id, contents, digest) in the order

1575

read, not the order requested

1576

"""

1577

if not records:

1578

return

1579

1580

if self._cache:

1581

# Skip records we have alread seen

1582

yielded_records = set()

1583

needed_records = set()

1584

for record in records:

1585

if record[0] in self._cache:

1586

if record[0] in yielded_records:

1587

continue

1588

yielded_records.add(record[0])

1589

data = self._cache[record[0]]

1590

content, digest = self._parse_record(record[0], data)

1591

yield (record[0], content, digest)

1592

else:

1593

needed_records.add(record)

1594

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1595

else:

1596

needed_records = sorted(set(records), key=operator.itemgetter(1))

1597

1598

if not needed_records:

1599

return

1600

1601

# The transport optimizes the fetching as well

1602

# (ie, reads continuous ranges.)

1603

readv_response = self._transport.readv(self._filename,

1604

[(pos, size) for version_id, pos, size in needed_records])

1605

1606

for (version_id, pos, size), (pos, data) in \

1607

izip(iter(needed_records), readv_response):

1608

content, digest = self._parse_record(version_id, data)

1609

if self._do_cache:

1610

self._cache[version_id] = data

1611

yield version_id, content, digest

1612

1613

def read_records(self, records):

1614

"""Read records into a dictionary."""

1615

components = {}

1616

for record_id, content, digest in \

1617

self.read_records_iter(records):

1618

components[record_id] = (content, digest)

1619

return components

1620

1621

1622

class InterKnit(InterVersionedFile):

1623

"""Optimised code paths for knit to knit operations."""

1624

1625

_matching_file_from_factory = KnitVersionedFile

1626

_matching_file_to_factory = KnitVersionedFile

1627

1628

@staticmethod

1629

def is_compatible(source, target):

1630

"""Be compatible with knits. """

1631

try:

1632

return (isinstance(source, KnitVersionedFile) and

1633

isinstance(target, KnitVersionedFile))

1634

except AttributeError:

1635

return False

1636

1637

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1638

"""See InterVersionedFile.join."""

1639

assert isinstance(self.source, KnitVersionedFile)

1640

assert isinstance(self.target, KnitVersionedFile)

1641

1642

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1643

1644

if not version_ids:

1645

return 0

1646

1647

pb = ui.ui_factory.nested_progress_bar()

1648

try:

1649

version_ids = list(version_ids)

1650

if None in version_ids:

1651

version_ids.remove(None)

1652

1653

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1654

this_versions = set(self.target._index.get_versions())

1655

needed_versions = self.source_ancestry - this_versions

1656

cross_check_versions = self.source_ancestry.intersection(this_versions)

1657

mismatched_versions = set()

1658

for version in cross_check_versions:

1659

# scan to include needed parents.

1660

n1 = set(self.target.get_parents_with_ghosts(version))

1661

n2 = set(self.source.get_parents_with_ghosts(version))

1662

if n1 != n2:

1663

# FIXME TEST this check for cycles being introduced works

1664

# the logic is we have a cycle if in our graph we are an

1665

# ancestor of any of the n2 revisions.

1666

for parent in n2:

1667

if parent in n1:

1668

# safe

1669

continue

1670

else:

1671

parent_ancestors = self.source.get_ancestry(parent)

1672

if version in parent_ancestors:

1673

raise errors.GraphCycleError([parent, version])

1674

# ensure this parent will be available later.

1675

new_parents = n2.difference(n1)

1676

needed_versions.update(new_parents.difference(this_versions))

1677

mismatched_versions.add(version)

1678

1679

if not needed_versions and not mismatched_versions:

1680

return 0

1681

full_list = topo_sort(self.source.get_graph())

1682

1683

version_list = [i for i in full_list if (not self.target.has_version(i)

1684

and i in needed_versions)]

1685

1686

# plan the join:

1687

copy_queue = []

1688

copy_queue_records = []

1689

copy_set = set()

1690

for version_id in version_list:

1691

options = self.source._index.get_options(version_id)

1692

parents = self.source._index.get_parents_with_ghosts(version_id)

1693

# check that its will be a consistent copy:

1694

for parent in parents:

1695

# if source has the parent, we must :

1696

# * already have it or

1697

# * have it scheduled already

1698

# otherwise we don't care

1699

assert (self.target.has_version(parent) or

1700

parent in copy_set or

1701

not self.source.has_version(parent))

1702

data_pos, data_size = self.source._index.get_position(version_id)

1703

copy_queue_records.append((version_id, data_pos, data_size))

1704

copy_queue.append((version_id, options, parents))

1705

copy_set.add(version_id)

1706

1707

# data suck the join:

1708

count = 0

1709

total = len(version_list)

1710

raw_datum = []

1711

raw_records = []

1712

for (version_id, raw_data), \

1713

(version_id2, options, parents) in \

1714

izip(self.source._data.read_records_iter_raw(copy_queue_records),

1715

copy_queue):

1716

assert version_id == version_id2, 'logic error, inconsistent results'

1717

count = count + 1

1718

pb.update("Joining knit", count, total)

1719

raw_records.append((version_id, options, parents, len(raw_data)))

1720

raw_datum.append(raw_data)

1721

self.target._add_raw_records(raw_records, ''.join(raw_datum))

1722

1723

for version in mismatched_versions:

1724

# FIXME RBC 20060309 is this needed?

1725

n1 = set(self.target.get_parents_with_ghosts(version))

1726

n2 = set(self.source.get_parents_with_ghosts(version))

1727

# write a combined record to our history preserving the current

1728

# parents as first in the list

1729

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1730

self.target.fix_parents(version, new_parents)

1731

return count

1732

finally:

1733

pb.finished()

1734

1735

1736

InterVersionedFile.register_optimiser(InterKnit)

1737

1738

1739

class WeaveToKnit(InterVersionedFile):

1740

"""Optimised code paths for weave to knit operations."""

1741

1742

_matching_file_from_factory = bzrlib.weave.WeaveFile

1743

_matching_file_to_factory = KnitVersionedFile

1744

1745

@staticmethod

1746

def is_compatible(source, target):

1747

"""Be compatible with weaves to knits."""

1748

try:

1749

return (isinstance(source, bzrlib.weave.Weave) and

1750

isinstance(target, KnitVersionedFile))

1751

except AttributeError:

1752

return False

1753

1754

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

1755

"""See InterVersionedFile.join."""

1756

assert isinstance(self.source, bzrlib.weave.Weave)

1757

assert isinstance(self.target, KnitVersionedFile)

1758

1759

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

1760

1761

if not version_ids:

1762

return 0

1763

1764

pb = ui.ui_factory.nested_progress_bar()

1765

try:

1766

version_ids = list(version_ids)

1767

1768

self.source_ancestry = set(self.source.get_ancestry(version_ids))

1769

this_versions = set(self.target._index.get_versions())

1770

needed_versions = self.source_ancestry - this_versions

1771

cross_check_versions = self.source_ancestry.intersection(this_versions)

1772

mismatched_versions = set()

1773

for version in cross_check_versions:

1774

# scan to include needed parents.

1775

n1 = set(self.target.get_parents_with_ghosts(version))

1776

n2 = set(self.source.get_parents(version))

1777

# if all of n2's parents are in n1, then its fine.

1778

if n2.difference(n1):

1779

# FIXME TEST this check for cycles being introduced works

1780

# the logic is we have a cycle if in our graph we are an

1781

# ancestor of any of the n2 revisions.

1782

for parent in n2:

1783

if parent in n1:

1784

# safe

1785

continue

1786

else:

1787

parent_ancestors = self.source.get_ancestry(parent)

1788

if version in parent_ancestors:

1789

raise errors.GraphCycleError([parent, version])

1790

# ensure this parent will be available later.

1791

new_parents = n2.difference(n1)

1792

needed_versions.update(new_parents.difference(this_versions))

1793

mismatched_versions.add(version)

1794

1795

if not needed_versions and not mismatched_versions:

1796

return 0

1797

full_list = topo_sort(self.source.get_graph())

1798

1799

version_list = [i for i in full_list if (not self.target.has_version(i)

1800

and i in needed_versions)]

1801

1802

# do the join:

1803

count = 0

1804

total = len(version_list)

1805

for version_id in version_list:

1806

pb.update("Converting to knit", count, total)

1807

parents = self.source.get_parents(version_id)

1808

# check that its will be a consistent copy:

1809

for parent in parents:

1810

# if source has the parent, we must already have it

1811

assert (self.target.has_version(parent))

1812

self.target.add_lines(

1813

version_id, parents, self.source.get_lines(version_id))

1814

count = count + 1

1815

1816

for version in mismatched_versions:

1817

# FIXME RBC 20060309 is this needed?

1818

n1 = set(self.target.get_parents_with_ghosts(version))

1819

n2 = set(self.source.get_parents(version))

1820

# write a combined record to our history preserving the current

1821

# parents as first in the list

1822

new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))

1823

self.target.fix_parents(version, new_parents)

1824

return count

1825

finally:

1826

pb.finished()

1827

1828

1829

InterVersionedFile.register_optimiser(WeaveToKnit)

1830

1831

1832

class KnitSequenceMatcher(difflib.SequenceMatcher):

1833

"""Knit tuned sequence matcher.

1834

1835

This is based on profiling of difflib which indicated some improvements

1836

for our usage pattern.

1837

"""

1838

1839

def find_longest_match(self, alo, ahi, blo, bhi):

1840

"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

1841

1842

If isjunk is not defined:

1843

1844

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where

1845

alo <= i <= i+k <= ahi

1846

blo <= j <= j+k <= bhi

1847

and for all (i',j',k') meeting those conditions,

1848

k >= k'

1849

i <= i'

1850

and if i == i', j <= j'

1851

1852

In other words, of all maximal matching blocks, return one that

1853

starts earliest in a, and of all those maximal matching blocks that

1854

start earliest in a, return the one that starts earliest in b.

1855

1856

>>> s = SequenceMatcher(None, " abcd", "abcd abcd")

1857

>>> s.find_longest_match(0, 5, 0, 9)

1858

(0, 4, 5)

1859

1860

If isjunk is defined, first the longest matching block is

1861

determined as above, but with the additional restriction that no

1862

junk element appears in the block. Then that block is extended as

1863

far as possible by matching (only) junk elements on both sides. So

1864

the resulting block never matches on junk except as identical junk

1865

happens to be adjacent to an "interesting" match.

1866

1867

Here's the same example as before, but considering blanks to be

1868

junk. That prevents " abcd" from matching the " abcd" at the tail

1869

end of the second sequence directly. Instead only the "abcd" can

1870

match, and matches the leftmost "abcd" in the second sequence:

1871

1872

>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")

1873

>>> s.find_longest_match(0, 5, 0, 9)

1874

(1, 0, 4)

1875

1876

If no blocks match, return (alo, blo, 0).

1877

1878

>>> s = SequenceMatcher(None, "ab", "c")

1879

>>> s.find_longest_match(0, 2, 0, 1)

1880

(0, 0, 0)

1881

"""

1882

1883

# CAUTION: stripping common prefix or suffix would be incorrect.

1884

# E.g.,

1885

# ab

1886

# acab

1887

# Longest matching block is "ab", but if common prefix is

1888

# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so

1889

# strip, so ends up claiming that ab is changed to acab by

1890

# inserting "ca" in the middle. That's minimal but unintuitive:

1891

# "it's obvious" that someone inserted "ac" at the front.

1892

# Windiff ends up at the same place as diff, but by pairing up

1893

# the unique 'b's and then matching the first two 'a's.

1894

1895

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk

1896

besti, bestj, bestsize = alo, blo, 0

1897

# find longest junk-free match

1898

# during an iteration of the loop, j2len[j] = length of longest

1899

# junk-free match ending with a[i-1] and b[j]

1900

j2len = {}

1901

# nothing = []

1902

b2jget = b2j.get

1903

for i in xrange(alo, ahi):

1904

# look at all instances of a[i] in b; note that because

1905

# b2j has no junk keys, the loop is skipped if a[i] is junk

1906

j2lenget = j2len.get

1907

newj2len = {}

1908

1909

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1910

# following improvement

1911

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1912

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1913

# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>

1914

# to

1915

# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)

1916

# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>

1917

# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>

1918

1919

try:

1920

js = b2j[a[i]]

1921

except KeyError:

1922

pass

1923

else:

1924

for j in js:

1925

# a[i] matches b[j]

1926

if j >= blo:

1927

if j >= bhi:

1928

break

1929

k = newj2len[j] = 1 + j2lenget(-1 + j, 0)

1930

if k > bestsize:

1931

besti, bestj, bestsize = 1 + i-k, 1 + j-k, k

1932

j2len = newj2len

1933

1934

# Extend the best by non-junk elements on each end. In particular,

1935

# "popular" non-junk elements aren't in b2j, which greatly speeds

1936

# the inner loop above, but also means "the best" match so far

1937

# doesn't contain any junk *or* popular non-junk elements.

1938

while besti > alo and bestj > blo and \

1939

not isbjunk(b[bestj-1]) and \

1940

a[besti-1] == b[bestj-1]:

1941

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1942

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1943

not isbjunk(b[bestj+bestsize]) and \

1944

a[besti+bestsize] == b[bestj+bestsize]:

1945

bestsize += 1

1946

1947

# Now that we have a wholly interesting match (albeit possibly

1948

# empty!), we may as well suck up the matching junk on each

1949

# side of it too. Can't think of a good reason not to, and it

1950

# saves post-processing the (possibly considerable) expense of

1951

# figuring out what to do with it. In the case of an empty

1952

# interesting match, this is clearly the right thing to do,

1953

# because no other kind of match is possible in the regions.

1954

while besti > alo and bestj > blo and \

1955

isbjunk(b[bestj-1]) and \

1956

a[besti-1] == b[bestj-1]:

1957

besti, bestj, bestsize = besti-1, bestj-1, bestsize+1

1958

while besti+bestsize < ahi and bestj+bestsize < bhi and \

1959

isbjunk(b[bestj+bestsize]) and \

1960

a[besti+bestsize] == b[bestj+bestsize]:

1961

bestsize = bestsize + 1

1962

1963

return besti, bestj, bestsize

Older »