~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to doc/developers/diff.txt

Committer: Canonical.com Patch Queue Manager
Date: 2007-06-18 05:22:35 UTC
mfrom: (1551.15.27 Aaron's mergeable stuff)
Revision ID: pqm@pqm.ubuntu.com-20070618052235-mvns8j28szyzscy0

Turn list-weave into list-versionedfile

files added:
bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

files removed:
bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/bundle/serializer/v4.py

bzrlib/email_message.py

bzrlib/file_names.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/plugins/multiparent.py

bzrlib/smtp_connection.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_file_names.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_index.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/transport_util.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/transport/brokenrename.py

bzrlib/transport/unlistable.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

doc/bug_trackers.txt

doc/conflicts.txt

doc/developers/api-versioning.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/diff.txt

doc/developers/dirstate.txt

doc/developers/indices.txt

doc/developers/performance-contributing.txt

doc/developers/planned-change-integration.txt

doc/developers/repository.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

files renamed:
bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/tests/blackbox/test_submit.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

doc/developers/commit.txt => doc/developers/performance-commit.txt

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/sign_my_commits.py

bzrlib/smart/client.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/uncommit.py

bzrlib/util/bencode.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

doc/README.1st

doc/configuration.txt

doc/developers/HACKING

doc/developers/container-format.txt

doc/developers/index.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/http_smart_server.txt

doc/index.txt

doc/plugins.txt

setup.py

tools/capture_tree.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

doc/developers/diff.txt

diff Performance Analysis

=========================

.. contents:: :local:

Minimal Work

------------

Reuse of historical comparisons

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

A significant part of the work done by diff is sequence matching. This

scales O(n^2) with the number of lines in the file. Therefore, it

is worthwile to avoid content comparisons as much as possible.

Our current knit format contains content comparisons, and this data can

be converted into lists of matching blocks. Other future formats such as

mpdiff may also support such conversion. So it is possible to reuse past

comparisons.

It is also possible to combine sequential comparisons. So given a comparison

of "foo" to "bar", and "bar" to "baz", it is possible to derive a comparison of

"foo" to "baz".

Reuse of historical comparisons will scale with the number of uncommon

build-parents between the two historical revisions. This will typically be

proportional to the amount of change that the file has undergone. Therefore,

in the common case, reuse of historical comparisons will scale with the

amount of change.

The downside of such reuse is that it ties the comparison to the historical

data. But given the performance improvement, it seems to be worth

consideration. Fresh comparisons can be performed if the user requests them.

It may also be possible to accelerate comparisons by including annotation data,

thus increasing the number of unique lines.

Historical Tree Against Historical Tree

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This operation should be strictly proportional to the amount of change, because

a comparison has already been done at commit time. Achieving that performance

requires the committed data to be properly structured, so that the comparison

can be extracted and combined with other comparisons. This comparision

extraction should be possible at the inventory and file-content levels.

Minimum work:

1. Extract and combine inventory comparisons

2. Extract and combine text comparisions for modified texts

Basis Against Historical Tree

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This is another case of Historical Tree Against Historical Tree.

Basis Against Basis

~~~~~~~~~~~~~~~~~~~

This is another case of Historical Tree Against Historical Tree.

Working Tree Against Basis

~~~~~~~~~~~~~~~~~~~~~~~~~~

This must scale with the number of versioned files, unless the user indicates

that only certain files should be compared.

Performance can be further improved by caching comparisons to avoid repeating

them. Caching could potentially be performed by ``diff`` and perhaps by

``merge``. Merge is aware of the relationship of a text merge's result to

the THIS value, and the THIS value is generally the basis value. So the

comparison is latent, but present. The only issue is extracting it.

The cache could be indexed by sha1sum pairs. It could also be indexed by

file-id, to facilitate removal of stale data.

Minimum work:

1. Scan working tree for modified files

2. Retrieve cached comparisons

3. Perform comparisons on files with no cached comparisons

4. Cache comparisons for files with no cached comparisons

Working Tree Against Historical Tree

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This can be structured as a comparison of working tree against basis tree,

followed by basis tree against historical tree. Therefore, it combines the

performance characteristics of "Working Tree Against Basis" with "Basis Against

Historical Tree".

Working Tree Against Working Tree

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This can be structured as two comparisons against basis, and one comparison

of basis against basis. Its performance is therefore similar to Working Tree

Against Historical Tree.

API Changes

-----------

Desired API:

- Tree.get_comparision(file_id, tree)

This probably entails:

100

101

- WorkingTree.store_comparison(file_id, revision_id, sha1, comparison)

102

- WorkingTree.get_comparison(file_id, revision_id, sha1)

103

- Repository.get_comparision(file_id, revision_id, revision_id)

104

- merge_comparisions(comparison, comparision)

105

106

Storage considerations

107

----------------------

108

It must be cheap (e.g. scale with number of intermediate revisions) to perform

109

comparison of two historical texts. It must be cheap to perform comparison of

110

the inventories of two historical trees.

Older »