~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Danny van Heumen
Date: 2010-03-09 21:42:11 UTC
mto: (4634.139.5 2.0)
mto: This revision was merged to the branch mainline in revision 5160.
Revision ID: danny@dannyvanheumen.nl-20100309214211-iqh42x6qcikgd9p3

Reverted now-useless TODO list.

files added:
MANIFEST.in

NEWS-template.txt

README_BDIST_RPM

bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/debug.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc_generate/sphinx_conf.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/location-alias.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inventory_delta.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_revert.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_inventory

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches_data/binary-after-normal.patch

bzrlib/tests/test_patches_data/binary.patch

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/util/_bencode_py.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/views.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/Bazaar-Logo-For-Manuals.png

doc/developers

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/conf.py

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/implementation-notes.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/miscellaneous-notes.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plans.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/process.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/specifications.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_static/en/Makefile

doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/_static/en/bzr-en-quick-reference.png

doc/en/_static/en/bzr-en-quick-reference.svg

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_static/es/Makefile

doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/_static/es/bzr-es-quick-reference.png

doc/es/_static/es/bzr-es-quick-reference.svg

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/index.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/quick-reference

doc/es/quick-reference/index.txt

doc/es/user-guide

doc/es/user-guide/index-plain.txt

doc/es/user-guide/index.txt

doc/es/user-guide/resolving_conflicts.txt

doc/es/user-guide/version_info.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/index.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

doc/news-template.txt

doc/ru

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_static/ru/Makefile

doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/index.txt

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index-plain.txt

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/generate_release_notes.py

tools/package_docs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/info.txt

tools/win32/run_script.py

files removed:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/README.1st

doc/index.txt

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/graph.py => bzrlib/deprecated_graph.py

tools/doc_generate/ => bzrlib/doc_generate/

doc/configuration.txt => bzrlib/help_topics/en/configuration.txt

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/repository_implementations/test_revprops.py => bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/transport/ftp.py => bzrlib/transport/ftp/__init__.py

bzrlib/win32console.py => bzrlib/win32utils.py

bzrlib/xml5.py => bzrlib/xml8.py

HACKING => doc/developers/HACKING.txt

doc/tutorial.txt => doc/en/tutorials/tutorial.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/default.css

profile_imports.py

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

100

def __init__(self):

101

# map by key? or just order in file?

102

self._compressor_name = None

103

self._z_content = None

104

self._z_content_decompressor = None

105

self._z_content_length = None

106

self._content_length = None

107

self._content = None

108

self._content_chunks = None

109

110

def __len__(self):

111

# This is the maximum number of bytes this object will reference if

112

# everything is decompressed. However, if we decompress less than

113

# everything... (this would cause some problems for LRUSizeCache)

114

return self._content_length + self._z_content_length

115

116

def _ensure_content(self, num_bytes=None):

117

"""Make sure that content has been expanded enough.

118

119

:param num_bytes: Ensure that we have extracted at least num_bytes of

120

content. If None, consume everything

121

"""

122

# TODO: If we re-use the same content block at different times during

123

# get_record_stream(), it is possible that the first pass will

124

# get inserted, triggering an extract/_ensure_content() which

125

# will get rid of _z_content. And then the next use of the block

126

# will try to access _z_content (to send it over the wire), and

127

# fail because it is already extracted. Consider never releasing

128

# _z_content because of this.

129

if num_bytes is None:

130

num_bytes = self._content_length

131

elif (self._content_length is not None

132

and num_bytes > self._content_length):

133

raise AssertionError(

134

'requested num_bytes (%d) > content length (%d)'

135

% (num_bytes, self._content_length))

136

# Expand the content if required

137

if self._content is None:

138

if self._content_chunks is not None:

139

self._content = ''.join(self._content_chunks)

140

self._content_chunks = None

141

if self._content is None:

142

if self._z_content is None:

143

raise AssertionError('No content to decompress')

144

if self._z_content == '':

145

self._content = ''

146

elif self._compressor_name == 'lzma':

147

# We don't do partial lzma decomp yet

148

self._content = pylzma.decompress(self._z_content)

149

elif self._compressor_name == 'zlib':

150

# Start a zlib decompressor

151

if num_bytes is None:

152

self._content = zlib.decompress(self._z_content)

153

else:

154

self._z_content_decompressor = zlib.decompressobj()

155

# Seed the decompressor with the uncompressed bytes, so

156

# that the rest of the code is simplified

157

self._content = self._z_content_decompressor.decompress(

158

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

159

else:

160

raise AssertionError('Unknown compressor: %r'

161

% self._compressor_name)

162

# Any bytes remaining to be decompressed will be in the decompressors

163

# 'unconsumed_tail'

164

165

# Do we have enough bytes already?

166

if num_bytes is not None and len(self._content) >= num_bytes:

167

return

168

if num_bytes is None and self._z_content_decompressor is None:

169

# We must have already decompressed everything

170

return

171

# If we got this far, and don't have a decompressor, something is wrong

172

if self._z_content_decompressor is None:

173

raise AssertionError(

174

'No decompressor to decompress %d bytes' % num_bytes)

175

remaining_decomp = self._z_content_decompressor.unconsumed_tail

176

if num_bytes is None:

177

if remaining_decomp:

178

# We don't know how much is left, but we'll decompress it all

179

self._content += self._z_content_decompressor.decompress(

180

remaining_decomp)

181

# Note: There's what I consider a bug in zlib.decompressobj

182

# If you pass back in the entire unconsumed_tail, only

183

# this time you don't pass a max-size, it doesn't

184

# change the unconsumed_tail back to None/''.

185

# However, we know we are done with the whole stream

186

self._z_content_decompressor = None

187

# XXX: Why is this the only place in this routine we set this?

188

self._content_length = len(self._content)

189

else:

190

if not remaining_decomp:

191

raise AssertionError('Nothing left to decompress')

192

needed_bytes = num_bytes - len(self._content)

193

# We always set max_size to 32kB over the minimum needed, so that

194

# zlib will give us as much as we really want.

195

# TODO: If this isn't good enough, we could make a loop here,

196

# that keeps expanding the request until we get enough

197

self._content += self._z_content_decompressor.decompress(

198

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

199

if len(self._content) < num_bytes:

200

raise AssertionError('%d bytes wanted, only %d available'

201

% (num_bytes, len(self._content)))

202

if not self._z_content_decompressor.unconsumed_tail:

203

# The stream is finished

204

self._z_content_decompressor = None

205

206

def _parse_bytes(self, bytes, pos):

207

"""Read the various lengths from the header.

208

209

This also populates the various 'compressed' buffers.

210

211

:return: The position in bytes just after the last newline

212

"""

213

# At present, we have 2 integers for the compressed and uncompressed

214

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

215

# checking too far, cap the search to 14 bytes.

216

pos2 = bytes.index('\n', pos, pos + 14)

217

self._z_content_length = int(bytes[pos:pos2])

218

pos = pos2 + 1

219

pos2 = bytes.index('\n', pos, pos + 14)

220

self._content_length = int(bytes[pos:pos2])

221

pos = pos2 + 1

222

if len(bytes) != (pos + self._z_content_length):

223

# XXX: Define some GCCorrupt error ?

224

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

225

(len(bytes), pos, self._z_content_length))

226

self._z_content = bytes[pos:]

227

228

@classmethod

229

def from_bytes(cls, bytes):

230

out = cls()

231

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

232

raise ValueError('bytes did not start with any of %r'

233

% (cls.GCB_KNOWN_HEADERS,))

234

# XXX: why not testing the whole header ?

235

if bytes[4] == 'z':

236

out._compressor_name = 'zlib'

237

elif bytes[4] == 'l':

238

out._compressor_name = 'lzma'

239

else:

240

raise ValueError('unknown compressor: %r' % (bytes,))

241

out._parse_bytes(bytes, 6)

242

return out

243

244

def extract(self, key, start, end, sha1=None):

245

"""Extract the text for a specific key.

246

247

:param key: The label used for this content

248

:param sha1: TODO (should we validate only when sha1 is supplied?)

249

:return: The bytes for the content

250

"""

251

if start == end == 0:

252

return ''

253

self._ensure_content(end)

254

# The bytes are 'f' or 'd' for the type, then a variable-length

255

# base128 integer for the content size, then the actual content

256

# We know that the variable-length integer won't be longer than 5

257

# bytes (it takes 5 bytes to encode 2^32)

258

c = self._content[start]

259

if c == 'f':

260

type = 'fulltext'

261

else:

262

if c != 'd':

263

raise ValueError('Unknown content control code: %s'

264

% (c,))

265

type = 'delta'

266

content_len, len_len = decode_base128_int(

267

self._content[start + 1:start + 6])

268

content_start = start + 1 + len_len

269

if end != content_start + content_len:

270

raise ValueError('end != len according to field header'

271

' %s != %s' % (end, content_start + content_len))

272

if c == 'f':

273

bytes = self._content[content_start:end]

274

elif c == 'd':

275

bytes = apply_delta_to_source(self._content, content_start, end)

276

return bytes

277

278

def set_chunked_content(self, content_chunks, length):

279

"""Set the content of this block to the given chunks."""

280

# If we have lots of short lines, it is may be more efficient to join

281

# the content ahead of time. If the content is <10MiB, we don't really

282

# care about the extra memory consumption, so we can just pack it and

283

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

284

# mysql, which is below the noise margin

285

self._content_length = length

286

self._content_chunks = content_chunks

287

self._content = None

288

self._z_content = None

289

290

def set_content(self, content):

291

"""Set the content of this block."""

292

self._content_length = len(content)

293

self._content = content

294

self._z_content = None

295

296

def _create_z_content_using_lzma(self):

297

if self._content_chunks is not None:

298

self._content = ''.join(self._content_chunks)

299

self._content_chunks = None

300

if self._content is None:

301

raise AssertionError('Nothing to compress')

302

self._z_content = pylzma.compress(self._content)

303

self._z_content_length = len(self._z_content)

304

305

def _create_z_content_from_chunks(self):

306

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

307

compressed_chunks = map(compressor.compress, self._content_chunks)

308

compressed_chunks.append(compressor.flush())

309

self._z_content = ''.join(compressed_chunks)

310

self._z_content_length = len(self._z_content)

311

312

def _create_z_content(self):

313

if self._z_content is not None:

314

return

315

if _USE_LZMA:

316

self._create_z_content_using_lzma()

317

return

318

if self._content_chunks is not None:

319

self._create_z_content_from_chunks()

320

return

321

self._z_content = zlib.compress(self._content)

322

self._z_content_length = len(self._z_content)

323

324

def to_bytes(self):

325

"""Encode the information into a byte stream."""

326

self._create_z_content()

327

if _USE_LZMA:

328

header = self.GCB_LZ_HEADER

329

else:

330

header = self.GCB_HEADER

331

chunks = [header,

332

'%d\n%d\n' % (self._z_content_length, self._content_length),

333

self._z_content,

334

]

335

return ''.join(chunks)

336

337

def _dump(self, include_text=False):

338

"""Take this block, and spit out a human-readable structure.

339

340

:param include_text: Inserts also include text bits, chose whether you

341

want this displayed in the dump or not.

342

:return: A dump of the given block. The layout is something like:

343

[('f', length), ('d', delta_length, text_length, [delta_info])]

344

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

345

...]

346

"""

347

self._ensure_content()

348

result = []

349

pos = 0

350

while pos < self._content_length:

351

kind = self._content[pos]

352

pos += 1

353

if kind not in ('f', 'd'):

354

raise ValueError('invalid kind character: %r' % (kind,))

355

content_len, len_len = decode_base128_int(

356

self._content[pos:pos + 5])

357

pos += len_len

358

if content_len + pos > self._content_length:

359

raise ValueError('invalid content_len %d for record @ pos %d'

360

% (content_len, pos - len_len - 1))

361

if kind == 'f': # Fulltext

362

if include_text:

363

text = self._content[pos:pos+content_len]

364

result.append(('f', content_len, text))

365

else:

366

result.append(('f', content_len))

367

elif kind == 'd': # Delta

368

delta_content = self._content[pos:pos+content_len]

369

delta_info = []

370

# The first entry in a delta is the decompressed length

371

decomp_len, delta_pos = decode_base128_int(delta_content)

372

result.append(('d', content_len, decomp_len, delta_info))

373

measured_len = 0

374

while delta_pos < content_len:

375

c = ord(delta_content[delta_pos])

376

delta_pos += 1

377

if c & 0x80: # Copy

378

(offset, length,

379

delta_pos) = decode_copy_instruction(delta_content, c,

380

delta_pos)

381

if include_text:

382

text = self._content[offset:offset+length]

383

delta_info.append(('c', offset, length, text))

384

else:

385

delta_info.append(('c', offset, length))

386

measured_len += length

387

else: # Insert

388

if include_text:

389

txt = delta_content[delta_pos:delta_pos+c]

390

else:

391

txt = ''

392

delta_info.append(('i', c, txt))

393

measured_len += c

394

delta_pos += c

395

if delta_pos != content_len:

396

raise ValueError('Delta consumed a bad number of bytes:'

397

' %d != %d' % (delta_pos, content_len))

398

if measured_len != decomp_len:

399

raise ValueError('Delta claimed fulltext was %d bytes, but'

400

' extraction resulted in %d bytes'

401

% (decomp_len, measured_len))

402

pos += content_len

403

return result

404

405

406

class _LazyGroupCompressFactory(object):

407

"""Yield content from a GroupCompressBlock on demand."""

408

409

def __init__(self, key, parents, manager, start, end, first):

410

"""Create a _LazyGroupCompressFactory

411

412

:param key: The key of just this record

413

:param parents: The parents of this key (possibly None)

414

:param gc_block: A GroupCompressBlock object

415

:param start: Offset of the first byte for this record in the

416

uncompressd content

417

:param end: Offset of the byte just after the end of this record

418

(ie, bytes = content[start:end])

419

:param first: Is this the first Factory for the given block?

420

"""

421

self.key = key

422

self.parents = parents

423

self.sha1 = None

424

# Note: This attribute coupled with Manager._factories creates a

425

# reference cycle. Perhaps we would rather use a weakref(), or

426

# find an appropriate time to release the ref. After the first

427

# get_bytes_as call? After Manager.get_record_stream() returns

428

# the object?

429

self._manager = manager

430

self._bytes = None

431

self.storage_kind = 'groupcompress-block'

432

if not first:

433

self.storage_kind = 'groupcompress-block-ref'

434

self._first = first

435

self._start = start

436

self._end = end

437

438

def __repr__(self):

439

return '%s(%s, first=%s)' % (self.__class__.__name__,

440

self.key, self._first)

441

442

def get_bytes_as(self, storage_kind):

443

if storage_kind == self.storage_kind:

444

if self._first:

445

# wire bytes, something...

446

return self._manager._wire_bytes()

447

else:

448

return ''

449

if storage_kind in ('fulltext', 'chunked'):

450

if self._bytes is None:

451

# Grab and cache the raw bytes for this entry

452

# and break the ref-cycle with _manager since we don't need it

453

# anymore

454

self._manager._prepare_for_extract()

455

block = self._manager._block

456

self._bytes = block.extract(self.key, self._start, self._end)

457

# There are code paths that first extract as fulltext, and then

458

# extract as storage_kind (smart fetch). So we don't break the

459

# refcycle here, but instead in manager.get_record_stream()

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

472

# current size, and still be considered

473

# resuable

474

_full_block_size = 4*1024*1024

475

_full_mixed_block_size = 2*1024*1024

476

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

477

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

478

479

def __init__(self, block):

480

self._block = block

481

# We need to preserve the ordering

482

self._factories = []

483

self._last_byte = 0

484

485

def add_factory(self, key, parents, start, end):

486

if not self._factories:

487

first = True

488

else:

489

first = False

490

# Note that this creates a reference cycle....

491

factory = _LazyGroupCompressFactory(key, parents, self,

492

start, end, first=first)

493

# max() works here, but as a function call, doing a compare seems to be

494

# significantly faster, timeit says 250ms for max() and 100ms for the

495

# comparison

496

if end > self._last_byte:

497

self._last_byte = end

498

self._factories.append(factory)

499

500

def get_record_stream(self):

501

"""Get a record for all keys added so far."""

502

for factory in self._factories:

503

yield factory

504

# Break the ref-cycle

505

factory._bytes = None

506

factory._manager = None

507

# TODO: Consider setting self._factories = None after the above loop,

508

# as it will break the reference cycle

509

510

def _trim_block(self, last_byte):

511

"""Create a new GroupCompressBlock, with just some of the content."""

512

# None of the factories need to be adjusted, because the content is

513

# located in an identical place. Just that some of the unreferenced

514

# trailing bytes are stripped

515

trace.mutter('stripping trailing bytes from groupcompress block'

516

' %d => %d', self._block._content_length, last_byte)

517

new_block = GroupCompressBlock()

518

self._block._ensure_content(last_byte)

519

new_block.set_content(self._block._content[:last_byte])

520

self._block = new_block

521

522

def _rebuild_block(self):

523

"""Create a new GroupCompressBlock with only the referenced texts."""

524

compressor = GroupCompressor()

525

tstart = time.time()

526

old_length = self._block._content_length

527

end_point = 0

528

for factory in self._factories:

529

bytes = factory.get_bytes_as('fulltext')

530

(found_sha1, start_point, end_point,

531

type) = compressor.compress(factory.key, bytes, factory.sha1)

532

# Now update this factory with the new offsets, etc

533

factory.sha1 = found_sha1

534

factory._start = start_point

535

factory._end = end_point

536

self._last_byte = end_point

537

new_block = compressor.flush()

538

# TODO: Should we check that new_block really *is* smaller than the old

539

# block? It seems hard to come up with a method that it would

540

# expand, since we do full compression again. Perhaps based on a

541

# request that ends up poorly ordered?

542

delta = time.time() - tstart

543

self._block = new_block

544

trace.mutter('creating new compressed block on-the-fly in %.3fs'

545

' %d bytes => %d bytes', delta, old_length,

546

self._block._content_length)

547

548

def _prepare_for_extract(self):

549

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

550

# We expect that if one child is going to fulltext, all will be. This

551

# helps prevent all of them from extracting a small amount at a time.

552

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

553

# time (self._block._content) is a little expensive.

554

self._block._ensure_content(self._last_byte)

555

556

def _check_rebuild_action(self):

557

"""Check to see if our block should be repacked."""

558

total_bytes_used = 0

559

last_byte_used = 0

560

for factory in self._factories:

561

total_bytes_used += factory._end - factory._start

562

if last_byte_used < factory._end:

563

last_byte_used = factory._end

564

# If we are using more than half of the bytes from the block, we have

565

# nothing else to check

566

if total_bytes_used * 2 >= self._block._content_length:

567

return None, last_byte_used, total_bytes_used

568

# We are using less than 50% of the content. Is the content we are

569

# using at the beginning of the block? If so, we can just trim the

570

# tail, rather than rebuilding from scratch.

571

if total_bytes_used * 2 > last_byte_used:

572

return 'trim', last_byte_used, total_bytes_used

573

574

# We are using a small amount of the data, and it isn't just packed

575

# nicely at the front, so rebuild the content.

576

# Note: This would be *nicer* as a strip-data-from-group, rather than

577

# building it up again from scratch

578

# It might be reasonable to consider the fulltext sizes for

579

# different bits when deciding this, too. As you may have a small

580

# fulltext, and a trivial delta, and you are just trading around

581

# for another fulltext. If we do a simple 'prune' you may end up

582

# expanding many deltas into fulltexts, as well.

583

# If we build a cheap enough 'strip', then we could try a strip,

584

# if that expands the content, we then rebuild.

585

return 'rebuild', last_byte_used, total_bytes_used

586

587

def check_is_well_utilized(self):

588

"""Is the current block considered 'well utilized'?

589

590

This heuristic asks if the current block considers itself to be a fully

591

developed group, rather than just a loose collection of data.

592

"""

593

if len(self._factories) == 1:

594

# A block of length 1 could be improved by combining with other

595

# groups - don't look deeper. Even larger than max size groups

596

# could compress well with adjacent versions of the same thing.

597

return False

598

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

599

block_size = self._block._content_length

600

if total_bytes_used < block_size * self._max_cut_fraction:

601

# This block wants to trim itself small enough that we want to

602

# consider it under-utilized.

603

return False

604

# TODO: This code is meant to be the twin of _insert_record_stream's

605

# 'start_new_block' logic. It would probably be better to factor

606

# out that logic into a shared location, so that it stays

607

# together better

608

# We currently assume a block is properly utilized whenever it is >75%

609

# of the size of a 'full' block. In normal operation, a block is

610

# considered full when it hits 4MB of same-file content. So any block

611

# >3MB is 'full enough'.

612

# The only time this isn't true is when a given block has large-object

613

# content. (a single file >4MB, etc.)

614

# Under these circumstances, we allow a block to grow to

615

# 2 x largest_content. Which means that if a given block had a large

616

# object, it may actually be under-utilized. However, given that this

617

# is 'pack-on-the-fly' it is probably reasonable to not repack large

618

# content blobs on-the-fly. Note that because we return False for all

619

# 1-item blobs, we will repack them; we may wish to reevaluate our

620

# treatment of large object blobs in the future.

621

if block_size >= self._full_enough_block_size:

622

return True

623

# If a block is <3MB, it still may be considered 'full' if it contains

624

# mixed content. The current rule is 2MB of mixed content is considered

625

# full. So check to see if this block contains mixed content, and

626

# set the threshold appropriately.

627

common_prefix = None

628

for factory in self._factories:

629

prefix = factory.key[:-1]

630

if common_prefix is None:

631

common_prefix = prefix

632

elif prefix != common_prefix:

633

# Mixed content, check the size appropriately

634

if block_size >= self._full_enough_mixed_block_size:

635

return True

636

break

637

# The content failed both the mixed check and the single-content check

638

# so obviously it is not fully utilized

639

# TODO: there is one other constraint that isn't being checked

640

# namely, that the entries in the block are in the appropriate

641

# order. For example, you could insert the entries in exactly

642

# reverse groupcompress order, and we would think that is ok.

643

# (all the right objects are in one group, and it is fully

644

# utilized, etc.) For now, we assume that case is rare,

645

# especially since we should always fetch in 'groupcompress'

646

# order.

647

return False

648

649

def _check_rebuild_block(self):

650

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

651

if action is None:

652

return

653

if action == 'trim':

654

self._trim_block(last_byte_used)

655

elif action == 'rebuild':

656

self._rebuild_block()

657

else:

658

raise ValueError('unknown rebuild action: %r' % (action,))

659

660

def _wire_bytes(self):

661

"""Return a byte stream suitable for transmitting over the wire."""

662

self._check_rebuild_block()

663

# The outer block starts with:

664

# 'groupcompress-block\n'

665

# <length of compressed key info>\n

666

# <length of uncompressed info>\n

667

# <length of gc block>\n

668

# <header bytes>

669

# <gc-block>

670

lines = ['groupcompress-block\n']

671

# The minimal info we need is the key, the start offset, and the

672

# parents. The length and type are encoded in the record itself.

673

# However, passing in the other bits makes it easier. The list of

674

# keys, and the start offset, the length

675

# 1 line key

676

# 1 line with parents, '' for ()

677

# 1 line for start offset

678

# 1 line for end byte

679

header_lines = []

680

for factory in self._factories:

681

key_bytes = '\x00'.join(factory.key)

682

parents = factory.parents

683

if parents is None:

684

parent_bytes = 'None:'

685

else:

686

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

687

record_header = '%s\n%s\n%d\n%d\n' % (

688

key_bytes, parent_bytes, factory._start, factory._end)

689

header_lines.append(record_header)

690

# TODO: Can we break the refcycle at this point and set

691

# factory._manager = None?

692

header_bytes = ''.join(header_lines)

693

del header_lines

694

header_bytes_len = len(header_bytes)

695

z_header_bytes = zlib.compress(header_bytes)

696

del header_bytes

697

z_header_bytes_len = len(z_header_bytes)

698

block_bytes = self._block.to_bytes()

699

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

700

len(block_bytes)))

701

lines.append(z_header_bytes)

702

lines.append(block_bytes)

703

del z_header_bytes, block_bytes

704

return ''.join(lines)

705

706

@classmethod

707

def from_bytes(cls, bytes):

708

# TODO: This does extra string copying, probably better to do it a

709

# different way

710

(storage_kind, z_header_len, header_len,

711

block_len, rest) = bytes.split('\n', 4)

712

del bytes

713

if storage_kind != 'groupcompress-block':

714

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

715

z_header_len = int(z_header_len)

716

if len(rest) < z_header_len:

717

raise ValueError('Compressed header len shorter than all bytes')

718

z_header = rest[:z_header_len]

719

header_len = int(header_len)

720

header = zlib.decompress(z_header)

721

if len(header) != header_len:

722

raise ValueError('invalid length for decompressed bytes')

723

del z_header

724

block_len = int(block_len)

725

if len(rest) != z_header_len + block_len:

726

raise ValueError('Invalid length for block')

727

block_bytes = rest[z_header_len:]

728

del rest

729

# So now we have a valid GCB, we just need to parse the factories that

730

# were sent to us

731

header_lines = header.split('\n')

732

del header

733

last = header_lines.pop()

734

if last != '':

735

raise ValueError('header lines did not end with a trailing'

736

' newline')

737

if len(header_lines) % 4 != 0:

738

raise ValueError('The header was not an even multiple of 4 lines')

739

block = GroupCompressBlock.from_bytes(block_bytes)

740

del block_bytes

741

result = cls(block)

742

for start in xrange(0, len(header_lines), 4):

743

# intern()?

744

key = tuple(header_lines[start].split('\x00'))

745

parents_line = header_lines[start+1]

746

if parents_line == 'None:':

747

parents = None

748

else:

749

parents = tuple([tuple(segment.split('\x00'))

750

for segment in parents_line.split('\t')

751

if segment])

752

start_offset = int(header_lines[start+2])

753

end_offset = int(header_lines[start+3])

754

result.add_factory(key, parents, start_offset, end_offset)

755

return result

756

757

758

def network_block_to_records(storage_kind, bytes, line_end):

759

if storage_kind != 'groupcompress-block':

760

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

761

manager = _LazyGroupContentManager.from_bytes(bytes)

762

return manager.get_record_stream()

763

764

765

class _CommonGroupCompressor(object):

766

767

def __init__(self):

768

"""Create a GroupCompressor."""

769

self.chunks = []

770

self._last = None

771

self.endpoint = 0

772

self.input_bytes = 0

773

self.labels_deltas = {}

774

self._delta_index = None # Set by the children

775

self._block = GroupCompressBlock()

776

777

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

778

"""Compress lines with label key.

779

780

:param key: A key tuple. It is stored in the output

781

for identification of the text during decompression. If the last

782

element is 'None' it is replaced with the sha1 of the text -

783

e.g. sha1:xxxxxxx.

784

:param bytes: The bytes to be compressed

785

:param expected_sha: If non-None, the sha the lines are believed to

786

have. During compression the sha is calculated; a mismatch will

787

cause an error.

788

:param nostore_sha: If the computed sha1 sum matches, we will raise

789

ExistingContent rather than adding the text.

790

:param soft: Do a 'soft' compression. This means that we require larger

791

ranges to match to be considered for a copy command.

792

793

:return: The sha1 of lines, the start and end offsets in the delta, and

794

the type ('fulltext' or 'delta').

795

796

:seealso VersionedFiles.add_lines:

797

"""

798

if not bytes: # empty, like a dir entry, etc

799

if nostore_sha == _null_sha1:

800

raise errors.ExistingContent()

801

return _null_sha1, 0, 0, 'fulltext'

802

# we assume someone knew what they were doing when they passed it in

803

if expected_sha is not None:

804

sha1 = expected_sha

805

else:

806

sha1 = osutils.sha_string(bytes)

807

if nostore_sha is not None:

808

if sha1 == nostore_sha:

809

raise errors.ExistingContent()

810

if key[-1] is None:

811

key = key[:-1] + ('sha1:' + sha1,)

812

813

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

814

return sha1, start, end, type

815

816

def _compress(self, key, bytes, max_delta_size, soft=False):

817

"""Compress lines with label key.

818

819

:param key: A key tuple. It is stored in the output for identification

820

of the text during decompression.

821

822

:param bytes: The bytes to be compressed

823

824

:param max_delta_size: The size above which we issue a fulltext instead

825

of a delta.

826

827

:param soft: Do a 'soft' compression. This means that we require larger

828

ranges to match to be considered for a copy command.

829

830

:return: The sha1 of lines, the start and end offsets in the delta, and

831

the type ('fulltext' or 'delta').

832

"""

833

raise NotImplementedError(self._compress)

834

835

def extract(self, key):

836

"""Extract a key previously added to the compressor.

837

838

:param key: The key to extract.

839

:return: An iterable over bytes and the sha1.

840

"""

841

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

842

delta_chunks = self.chunks[start_chunk:end_chunk]

843

stored_bytes = ''.join(delta_chunks)

844

if stored_bytes[0] == 'f':

845

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

846

data_len = fulltext_len + 1 + offset

847

if data_len != len(stored_bytes):

848

raise ValueError('Index claimed fulltext len, but stored bytes'

849

' claim %s != %s'

850

% (len(stored_bytes), data_len))

851

bytes = stored_bytes[offset + 1:]

852

else:

853

# XXX: This is inefficient at best

854

source = ''.join(self.chunks[:start_chunk])

855

if stored_bytes[0] != 'd':

856

raise ValueError('Unknown content kind, bytes claim %s'

857

% (stored_bytes[0],))

858

delta_len, offset = decode_base128_int(stored_bytes[1:10])

859

data_len = delta_len + 1 + offset

860

if data_len != len(stored_bytes):

861

raise ValueError('Index claimed delta len, but stored bytes'

862

' claim %s != %s'

863

% (len(stored_bytes), data_len))

864

bytes = apply_delta(source, stored_bytes[offset + 1:])

865

bytes_sha1 = osutils.sha_string(bytes)

866

return bytes, bytes_sha1

867

868

def flush(self):

869

"""Finish this group, creating a formatted stream.

870

871

After calling this, the compressor should no longer be used

872

"""

873

# TODO: this causes us to 'bloat' to 2x the size of content in the

874

# group. This has an impact for 'commit' of large objects.

875

# One possibility is to use self._content_chunks, and be lazy and

876

# only fill out self._content as a full string when we actually

877

# need it. That would at least drop the peak memory consumption

878

# for 'commit' down to ~1x the size of the largest file, at a

879

# cost of increased complexity within this code. 2x is still <<

880

# 3x the size of the largest file, so we are doing ok.

881

self._block.set_chunked_content(self.chunks, self.endpoint)

882

self.chunks = None

883

self._delta_index = None

884

return self._block

885

886

def pop_last(self):

887

"""Call this if you want to 'revoke' the last compression.

888

889

After this, the data structures will be rolled back, but you cannot do

890

more compression.

891

"""

892

self._delta_index = None

893

del self.chunks[self._last[0]:]

894

self.endpoint = self._last[1]

895

self._last = None

896

897

def ratio(self):

898

"""Return the overall compression ratio."""

899

return float(self.input_bytes) / float(self.endpoint)

900

901

902

class PythonGroupCompressor(_CommonGroupCompressor):

903

904

def __init__(self):

905

"""Create a GroupCompressor.

906

907

Used only if the pyrex version is not available.

908

"""

909

super(PythonGroupCompressor, self).__init__()

910

self._delta_index = LinesDeltaIndex([])

911

# The actual content is managed by LinesDeltaIndex

912

self.chunks = self._delta_index.lines

913

914

def _compress(self, key, bytes, max_delta_size, soft=False):

915

"""see _CommonGroupCompressor._compress"""

916

input_len = len(bytes)

917

new_lines = osutils.split_lines(bytes)

918

out_lines, index_lines = self._delta_index.make_delta(

919

new_lines, bytes_length=input_len, soft=soft)

920

delta_length = sum(map(len, out_lines))

921

if delta_length > max_delta_size:

922

# The delta is longer than the fulltext, insert a fulltext

923

type = 'fulltext'

924

out_lines = ['f', encode_base128_int(input_len)]

925

out_lines.extend(new_lines)

926

index_lines = [False, False]

927

index_lines.extend([True] * len(new_lines))

928

else:

929

# this is a worthy delta, output it

930

type = 'delta'

931

out_lines[0] = 'd'

932

# Update the delta_length to include those two encoded integers

933

out_lines[1] = encode_base128_int(delta_length)

934

# Before insertion

935

start = self.endpoint

936

chunk_start = len(self.chunks)

937

self._last = (chunk_start, self.endpoint)

938

self._delta_index.extend_lines(out_lines, index_lines)

939

self.endpoint = self._delta_index.endpoint

940

self.input_bytes += input_len

941

chunk_end = len(self.chunks)

942

self.labels_deltas[key] = (start, chunk_start,

943

self.endpoint, chunk_end)

944

return start, self.endpoint, type

945

946

947

class PyrexGroupCompressor(_CommonGroupCompressor):

948

"""Produce a serialised group of compressed texts.

949

950

It contains code very similar to SequenceMatcher because of having a similar

951

task. However some key differences apply:

952

- there is no junk, we want a minimal edit not a human readable diff.

953

- we don't filter very common lines (because we don't know where a good

954

range will start, and after the first text we want to be emitting minmal

955

edits only.

956

- we chain the left side, not the right side

957

- we incrementally update the adjacency matrix as new lines are provided.

958

- we look for matches in all of the left side, so the routine which does

959

the analagous task of find_longest_match does not need to filter on the

960

left side.

961

"""

962

963

def __init__(self):

964

super(PyrexGroupCompressor, self).__init__()

965

self._delta_index = DeltaIndex()

966

967

def _compress(self, key, bytes, max_delta_size, soft=False):

968

"""see _CommonGroupCompressor._compress"""

969

input_len = len(bytes)

970

# By having action/label/sha1/len, we can parse the group if the index

971

# was ever destroyed, we have the key in 'label', we know the final

972

# bytes are valid from sha1, and we know where to find the end of this

973

# record because of 'len'. (the delta record itself will store the

974

# total length for the expanded record)

975

# 'len: %d\n' costs approximately 1% increase in total data

976

# Having the labels at all costs us 9-10% increase, 38% increase for

977

# inventory pages, and 5.8% increase for text pages

978

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

979

if self._delta_index._source_offset != self.endpoint:

980

raise AssertionError('_source_offset != endpoint'

981

' somehow the DeltaIndex got out of sync with'

982

' the output lines')

983

delta = self._delta_index.make_delta(bytes, max_delta_size)

984

if (delta is None):

985

type = 'fulltext'

986

enc_length = encode_base128_int(len(bytes))

987

len_mini_header = 1 + len(enc_length)

988

self._delta_index.add_source(bytes, len_mini_header)

989

new_chunks = ['f', enc_length, bytes]

990

else:

991

type = 'delta'

992

enc_length = encode_base128_int(len(delta))

993

len_mini_header = 1 + len(enc_length)

994

new_chunks = ['d', enc_length, delta]

995

self._delta_index.add_delta_source(delta, len_mini_header)

996

# Before insertion

997

start = self.endpoint

998

chunk_start = len(self.chunks)

999

# Now output these bytes

1000

self._output_chunks(new_chunks)

1001

self.input_bytes += input_len

1002

chunk_end = len(self.chunks)

1003

self.labels_deltas[key] = (start, chunk_start,

1004

self.endpoint, chunk_end)

1005

if not self._delta_index._source_offset == self.endpoint:

1006

raise AssertionError('the delta index is out of sync'

1007

'with the output lines %s != %s'

1008

% (self._delta_index._source_offset, self.endpoint))

1009

return start, self.endpoint, type

1010

1011

def _output_chunks(self, new_chunks):

1012

"""Output some chunks.

1013

1014

:param new_chunks: The chunks to output.

1015

"""

1016

self._last = (len(self.chunks), self.endpoint)

1017

endpoint = self.endpoint

1018

self.chunks.extend(new_chunks)

1019

endpoint += sum(map(len, new_chunks))

1020

self.endpoint = endpoint

1021

1022

1023

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

1024

"""Create a factory for creating a pack based groupcompress.

1025

1026

This is only functional enough to run interface tests, it doesn't try to

1027

provide a full pack environment.

1028

1029

:param graph: Store a graph.

1030

:param delta: Delta compress contents.

1031

:param keylength: How long should keys be.

1032

"""

1033

def factory(transport):

1034

parents = graph

1035

ref_length = 0

1036

if graph:

1037

ref_length = 1

1038

graph_index = BTreeBuilder(reference_lists=ref_length,

1039

key_elements=keylength)

1040

stream = transport.open_write_stream('newpack')

1041

writer = pack.ContainerWriter(stream.write)

1042

writer.begin()

1043

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

1044

add_callback=graph_index.add_nodes,

1045

inconsistency_fatal=inconsistency_fatal)

1046

access = knit._DirectPackAccess({})

1047

access.set_writer(writer, graph_index, (transport, 'newpack'))

1048

result = GroupCompressVersionedFiles(index, access, delta)

1049

result.stream = stream

1050

result.writer = writer

1051

return result

1052

return factory

1053

1054

1055

def cleanup_pack_group(versioned_files):

1056

versioned_files.writer.end()

1057

versioned_files.stream.close()

1058

1059

1060

class _BatchingBlockFetcher(object):

1061

"""Fetch group compress blocks in batches.

1062

1063

:ivar total_bytes: int of expected number of bytes needed to fetch the

1064

currently pending batch.

1065

"""

1066

1067

def __init__(self, gcvf, locations):

1068

self.gcvf = gcvf

1069

self.locations = locations

1070

self.keys = []

1071

self.batch_memos = {}

1072

self.memos_to_get = []

1073

self.total_bytes = 0

1074

self.last_read_memo = None

1075

self.manager = None

1076

1077

def add_key(self, key):

1078

"""Add another to key to fetch.

1079

1080

:return: The estimated number of bytes needed to fetch the batch so

1081

far.

1082

"""

1083

self.keys.append(key)

1084

index_memo, _, _, _ = self.locations[key]

1085

read_memo = index_memo[0:3]

1086

# Three possibilities for this read_memo:

1087

# - it's already part of this batch; or

1088

# - it's not yet part of this batch, but is already cached; or

1089

# - it's not yet part of this batch and will need to be fetched.

1090

if read_memo in self.batch_memos:

1091

# This read memo is already in this batch.

1092

return self.total_bytes

1093

try:

1094

cached_block = self.gcvf._group_cache[read_memo]

1095

except KeyError:

1096

# This read memo is new to this batch, and the data isn't cached

1097

# either.

1098

self.batch_memos[read_memo] = None

1099

self.memos_to_get.append(read_memo)

1100

byte_length = read_memo[2]

1101

self.total_bytes += byte_length

1102

else:

1103

# This read memo is new to this batch, but cached.

1104

# Keep a reference to the cached block in batch_memos because it's

1105

# certain that we'll use it when this batch is processed, but

1106

# there's a risk that it would fall out of _group_cache between now

1107

# and then.

1108

self.batch_memos[read_memo] = cached_block

1109

return self.total_bytes

1110

1111

def _flush_manager(self):

1112

if self.manager is not None:

1113

for factory in self.manager.get_record_stream():

1114

yield factory

1115

self.manager = None

1116

self.last_read_memo = None

1117

1118

def yield_factories(self, full_flush=False):

1119

"""Yield factories for keys added since the last yield. They will be

1120

returned in the order they were added via add_key.

1121

1122

:param full_flush: by default, some results may not be returned in case

1123

they can be part of the next batch. If full_flush is True, then

1124

all results are returned.

1125

"""

1126

if self.manager is None and not self.keys:

1127

return

1128

# Fetch all memos in this batch.

1129

blocks = self.gcvf._get_blocks(self.memos_to_get)

1130

# Turn blocks into factories and yield them.

1131

memos_to_get_stack = list(self.memos_to_get)

1132

memos_to_get_stack.reverse()

1133

for key in self.keys:

1134

index_memo, _, parents, _ = self.locations[key]

1135

read_memo = index_memo[:3]

1136

if self.last_read_memo != read_memo:

1137

# We are starting a new block. If we have a

1138

# manager, we have found everything that fits for

1139

# now, so yield records

1140

for factory in self._flush_manager():

1141

yield factory

1142

# Now start a new manager.

1143

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1144

# The next block from _get_blocks will be the block we

1145

# need.

1146

block_read_memo, block = blocks.next()

1147

if block_read_memo != read_memo:

1148

raise AssertionError(

1149

"block_read_memo out of sync with read_memo"

1150

"(%r != %r)" % (block_read_memo, read_memo))

1151

self.batch_memos[read_memo] = block

1152

memos_to_get_stack.pop()

1153

else:

1154

block = self.batch_memos[read_memo]

1155

self.manager = _LazyGroupContentManager(block)

1156

self.last_read_memo = read_memo

1157

start, end = index_memo[3:5]

1158

self.manager.add_factory(key, parents, start, end)

1159

if full_flush:

1160

for factory in self._flush_manager():

1161

yield factory

1162

del self.keys[:]

1163

self.batch_memos.clear()

1164

del self.memos_to_get[:]

1165

self.total_bytes = 0

1166

1167

1168

class GroupCompressVersionedFiles(VersionedFiles):

1169

"""A group-compress based VersionedFiles implementation."""

1170

1171

def __init__(self, index, access, delta=True, _unadded_refs=None):

1172

"""Create a GroupCompressVersionedFiles object.

1173

1174

:param index: The index object storing access and graph data.

1175

:param access: The access object storing raw data.

1176

:param delta: Whether to delta compress or just entropy compress.

1177

:param _unadded_refs: private parameter, don't use.

1178

"""

1179

self._index = index

1180

self._access = access

1181

self._delta = delta

1182

if _unadded_refs is None:

1183

_unadded_refs = {}

1184

self._unadded_refs = _unadded_refs

1185

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

1186

self._fallback_vfs = []

1187

1188

def without_fallbacks(self):

1189

"""Return a clone of this object without any fallbacks configured."""

1190

return GroupCompressVersionedFiles(self._index, self._access,

1191

self._delta, _unadded_refs=dict(self._unadded_refs))

1192

1193

def add_lines(self, key, parents, lines, parent_texts=None,

1194

left_matching_blocks=None, nostore_sha=None, random_id=False,

1195

check_content=True):

1196

"""Add a text to the store.

1197

1198

:param key: The key tuple of the text to add.

1199

:param parents: The parents key tuples of the text to add.

1200

:param lines: A list of lines. Each line must be a bytestring. And all

1201

of them except the last must be terminated with \n and contain no

1202

other \n's. The last line may either contain no \n's or a single

1203

terminating \n. If the lines list does meet this constraint the add

1204

routine may error or may succeed - but you will be unable to read

1205

the data back accurately. (Checking the lines have been split

1206

correctly is expensive and extremely unlikely to catch bugs so it

1207

is not done at runtime unless check_content is True.)

1208

:param parent_texts: An optional dictionary containing the opaque

1209

representations of some or all of the parents of version_id to

1210

allow delta optimisations. VERY IMPORTANT: the texts must be those

1211

returned by add_lines or data corruption can be caused.

1212

:param left_matching_blocks: a hint about which areas are common

1213

between the text and its left-hand-parent. The format is

1214

the SequenceMatcher.get_matching_blocks format.

1215

:param nostore_sha: Raise ExistingContent and do not add the lines to

1216

the versioned file if the digest of the lines matches this.

1217

:param random_id: If True a random id has been selected rather than

1218

an id determined by some deterministic process such as a converter

1219

from a foreign VCS. When True the backend may choose not to check

1220

for uniqueness of the resulting key within the versioned file, so

1221

this should only be done when the result is expected to be unique

1222

anyway.

1223

:param check_content: If True, the lines supplied are verified to be

1224

bytestrings that are correctly formed lines.

1225

:return: The text sha1, the number of bytes in the text, and an opaque

1226

representation of the inserted version which can be provided

1227

back to future add_lines calls in the parent_texts dictionary.

1228

"""

1229

self._index._check_write_ok()

1230

self._check_add(key, lines, random_id, check_content)

1231

if parents is None:

1232

# The caller might pass None if there is no graph data, but kndx

1233

# indexes can't directly store that, so we give them

1234

# an empty tuple instead.

1235

parents = ()

1236

# double handling for now. Make it work until then.

1237

length = sum(map(len, lines))

1238

record = ChunkedContentFactory(key, parents, None, lines)

1239

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1240

nostore_sha=nostore_sha))[0]

1241

return sha1, length, None

1242

1243

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1244

"""See VersionedFiles._add_text()."""

1245

self._index._check_write_ok()

1246

self._check_add(key, None, random_id, check_content=False)

1247

if text.__class__ is not str:

1248

raise errors.BzrBadParameterUnicode("text")

1249

if parents is None:

1250

# The caller might pass None if there is no graph data, but kndx

1251

# indexes can't directly store that, so we give them

1252

# an empty tuple instead.

1253

parents = ()

1254

# double handling for now. Make it work until then.

1255

length = len(text)

1256

record = FulltextContentFactory(key, parents, None, text)

1257

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1258

nostore_sha=nostore_sha))[0]

1259

return sha1, length, None

1260

1261

def add_fallback_versioned_files(self, a_versioned_files):

1262

"""Add a source of texts for texts not present in this knit.

1263

1264

:param a_versioned_files: A VersionedFiles object.

1265

"""

1266

self._fallback_vfs.append(a_versioned_files)

1267

1268

def annotate(self, key):

1269

"""See VersionedFiles.annotate."""

1270

ann = annotate.Annotator(self)

1271

return ann.annotate_flat(key)

1272

1273

def get_annotator(self):

1274

return annotate.Annotator(self)

1275

1276

def check(self, progress_bar=None, keys=None):

1277

"""See VersionedFiles.check()."""

1278

if keys is None:

1279

keys = self.keys()

1280

for record in self.get_record_stream(keys, 'unordered', True):

1281

record.get_bytes_as('fulltext')

1282

else:

1283

return self.get_record_stream(keys, 'unordered', True)

1284

1285

def _check_add(self, key, lines, random_id, check_content):

1286

"""check that version_id and lines are safe to add."""

1287

version_id = key[-1]

1288

if version_id is not None:

1289

if osutils.contains_whitespace(version_id):

1290

raise errors.InvalidRevisionId(version_id, self)

1291

self.check_not_reserved_id(version_id)

1292

# TODO: If random_id==False and the key is already present, we should

1293

# probably check that the existing content is identical to what is

1294

# being inserted, and otherwise raise an exception. This would make

1295

# the bundle code simpler.

1296

if check_content:

1297

self._check_lines_not_unicode(lines)

1298

self._check_lines_are_lines(lines)

1299

1300

def get_known_graph_ancestry(self, keys):

1301

"""Get a KnownGraph instance with the ancestry of keys."""

1302

# Note that this is identical to

1303

# KnitVersionedFiles.get_known_graph_ancestry, but they don't share

1304

# ancestry.

1305

parent_map, missing_keys = self._index.find_ancestry(keys)

1306

for fallback in self._fallback_vfs:

1307

if not missing_keys:

1308

break

1309

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1310

missing_keys)

1311

parent_map.update(f_parent_map)

1312

missing_keys = f_missing_keys

1313

kg = _mod_graph.KnownGraph(parent_map)

1314

return kg

1315

1316

def get_parent_map(self, keys):

1317

"""Get a map of the graph parents of keys.

1318

1319

:param keys: The keys to look up parents for.

1320

:return: A mapping from keys to parents. Absent keys are absent from

1321

the mapping.

1322

"""

1323

return self._get_parent_map_with_sources(keys)[0]

1324

1325

def _get_parent_map_with_sources(self, keys):

1326

"""Get a map of the parents of keys.

1327

1328

:param keys: The keys to look up parents for.

1329

:return: A tuple. The first element is a mapping from keys to parents.

1330

Absent keys are absent from the mapping. The second element is a

1331

list with the locations each key was found in. The first element

1332

is the in-this-knit parents, the second the first fallback source,

1333

and so on.

1334

"""

1335

result = {}

1336

sources = [self._index] + self._fallback_vfs

1337

source_results = []

1338

missing = set(keys)

1339

for source in sources:

1340

if not missing:

1341

break

1342

new_result = source.get_parent_map(missing)

1343

source_results.append(new_result)

1344

result.update(new_result)

1345

missing.difference_update(set(new_result))

1346

return result, source_results

1347

1348

def _get_blocks(self, read_memos):

1349

"""Get GroupCompressBlocks for the given read_memos.

1350

1351

:returns: a series of (read_memo, block) pairs, in the order they were

1352

originally passed.

1353

"""

1354

cached = {}

1355

for read_memo in read_memos:

1356

try:

1357

block = self._group_cache[read_memo]

1358

except KeyError:

1359

pass

1360

else:

1361

cached[read_memo] = block

1362

not_cached = []

1363

not_cached_seen = set()

1364

for read_memo in read_memos:

1365

if read_memo in cached:

1366

# Don't fetch what we already have

1367

continue

1368

if read_memo in not_cached_seen:

1369

# Don't try to fetch the same data twice

1370

continue

1371

not_cached.append(read_memo)

1372

not_cached_seen.add(read_memo)

1373

raw_records = self._access.get_raw_records(not_cached)

1374

for read_memo in read_memos:

1375

try:

1376

yield read_memo, cached[read_memo]

1377

except KeyError:

1378

# Read the block, and cache it.

1379

zdata = raw_records.next()

1380

block = GroupCompressBlock.from_bytes(zdata)

1381

self._group_cache[read_memo] = block

1382

cached[read_memo] = block

1383

yield read_memo, block

1384

1385

def get_missing_compression_parent_keys(self):

1386

"""Return the keys of missing compression parents.

1387

1388

Missing compression parents occur when a record stream was missing

1389

basis texts, or a index was scanned that had missing basis texts.

1390

"""

1391

# GroupCompress cannot currently reference texts that are not in the

1392

# group, so this is valid for now

1393

return frozenset()

1394

1395

def get_record_stream(self, keys, ordering, include_delta_closure):

1396

"""Get a stream of records for keys.

1397

1398

:param keys: The keys to include.

1399

:param ordering: Either 'unordered' or 'topological'. A topologically

1400

sorted stream has compression parents strictly before their

1401

children.

1402

:param include_delta_closure: If True then the closure across any

1403

compression parents will be included (in the opaque data).

1404

:return: An iterator of ContentFactory objects, each of which is only

1405

valid until the iterator is advanced.

1406

"""

1407

# keys might be a generator

1408

orig_keys = list(keys)

1409

keys = set(keys)

1410

if not keys:

1411

return

1412

if (not self._index.has_graph

1413

and ordering in ('topological', 'groupcompress')):

1414

# Cannot topological order when no graph has been stored.

1415

# but we allow 'as-requested' or 'unordered'

1416

ordering = 'unordered'

1417

1418

remaining_keys = keys

1419

while True:

1420

try:

1421

keys = set(remaining_keys)

1422

for content_factory in self._get_remaining_record_stream(keys,

1423

orig_keys, ordering, include_delta_closure):

1424

remaining_keys.discard(content_factory.key)

1425

yield content_factory

1426

return

1427

except errors.RetryWithNewPacks, e:

1428

self._access.reload_or_raise(e)

1429

1430

def _find_from_fallback(self, missing):

1431

"""Find whatever keys you can from the fallbacks.

1432

1433

:param missing: A set of missing keys. This set will be mutated as keys

1434

are found from a fallback_vfs

1435

:return: (parent_map, key_to_source_map, source_results)

1436

parent_map the overall key => parent_keys

1437

key_to_source_map a dict from {key: source}

1438

source_results a list of (source: keys)

1439

"""

1440

parent_map = {}

1441

key_to_source_map = {}

1442

source_results = []

1443

for source in self._fallback_vfs:

1444

if not missing:

1445

break

1446

source_parents = source.get_parent_map(missing)

1447

parent_map.update(source_parents)

1448

source_parents = list(source_parents)

1449

source_results.append((source, source_parents))

1450

key_to_source_map.update((key, source) for key in source_parents)

1451

missing.difference_update(source_parents)

1452

return parent_map, key_to_source_map, source_results

1453

1454

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1455

"""Get the (source, [keys]) list.

1456

1457

The returned objects should be in the order defined by 'ordering',

1458

which can weave between different sources.

1459

:param ordering: Must be one of 'topological' or 'groupcompress'

1460

:return: List of [(source, [keys])] tuples, such that all keys are in

1461

the defined order, regardless of source.

1462

"""

1463

if ordering == 'topological':

1464

present_keys = topo_sort(parent_map)

1465

else:

1466

# ordering == 'groupcompress'

1467

# XXX: This only optimizes for the target ordering. We may need

1468

# to balance that with the time it takes to extract

1469

# ordering, by somehow grouping based on

1470

# locations[key][0:3]

1471

present_keys = sort_gc_optimal(parent_map)

1472

# Now group by source:

1473

source_keys = []

1474

current_source = None

1475

for key in present_keys:

1476

source = key_to_source_map.get(key, self)

1477

if source is not current_source:

1478

source_keys.append((source, []))

1479

current_source = source

1480

source_keys[-1][1].append(key)

1481

return source_keys

1482

1483

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1484

key_to_source_map):

1485

source_keys = []

1486

current_source = None

1487

for key in orig_keys:

1488

if key in locations or key in unadded_keys:

1489

source = self

1490

elif key in key_to_source_map:

1491

source = key_to_source_map[key]

1492

else: # absent

1493

continue

1494

if source is not current_source:

1495

source_keys.append((source, []))

1496

current_source = source

1497

source_keys[-1][1].append(key)

1498

return source_keys

1499

1500

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1501

source_result):

1502

def get_group(key):

1503

# This is the group the bytes are stored in, followed by the

1504

# location in the group

1505

return locations[key][0]

1506

present_keys = sorted(locations.iterkeys(), key=get_group)

1507

# We don't have an ordering for keys in the in-memory object, but

1508

# lets process the in-memory ones first.

1509

present_keys = list(unadded_keys) + present_keys

1510

# Now grab all of the ones from other sources

1511

source_keys = [(self, present_keys)]

1512

source_keys.extend(source_result)

1513

return source_keys

1514

1515

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1516

include_delta_closure):

1517

"""Get a stream of records for keys.

1518

1519

:param keys: The keys to include.

1520

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1521

'as-requested'

1522

:param include_delta_closure: If True then the closure across any

1523

compression parents will be included (in the opaque data).

1524

:return: An iterator of ContentFactory objects, each of which is only

1525

valid until the iterator is advanced.

1526

"""

1527

# Cheap: iterate

1528

locations = self._index.get_build_details(keys)

1529

unadded_keys = set(self._unadded_refs).intersection(keys)

1530

missing = keys.difference(locations)

1531

missing.difference_update(unadded_keys)

1532

(fallback_parent_map, key_to_source_map,

1533

source_result) = self._find_from_fallback(missing)

1534

if ordering in ('topological', 'groupcompress'):

1535

# would be better to not globally sort initially but instead

1536

# start with one key, recurse to its oldest parent, then grab

1537

# everything in the same group, etc.

1538

parent_map = dict((key, details[2]) for key, details in

1539

locations.iteritems())

1540

for key in unadded_keys:

1541

parent_map[key] = self._unadded_refs[key]

1542

parent_map.update(fallback_parent_map)

1543

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1544

key_to_source_map)

1545

elif ordering == 'as-requested':

1546

source_keys = self._get_as_requested_source_keys(orig_keys,

1547

locations, unadded_keys, key_to_source_map)

1548

else:

1549

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1550

# Otherwise we thrash the _group_cache and destroy performance

1551

source_keys = self._get_io_ordered_source_keys(locations,

1552

unadded_keys, source_result)

1553

for key in missing:

1554

yield AbsentContentFactory(key)

1555

# Batch up as many keys as we can until either:

1556

# - we encounter an unadded ref, or

1557

# - we run out of keys, or

1558

# - the total bytes to retrieve for this batch > BATCH_SIZE

1559

batcher = _BatchingBlockFetcher(self, locations)

1560

for source, keys in source_keys:

1561

if source is self:

1562

for key in keys:

1563

if key in self._unadded_refs:

1564

# Flush batch, then yield unadded ref from

1565

# self._compressor.

1566

for factory in batcher.yield_factories(full_flush=True):

1567

yield factory

1568

bytes, sha1 = self._compressor.extract(key)

1569

parents = self._unadded_refs[key]

1570

yield FulltextContentFactory(key, parents, sha1, bytes)

1571

continue

1572

if batcher.add_key(key) > BATCH_SIZE:

1573

# Ok, this batch is big enough. Yield some results.

1574

for factory in batcher.yield_factories():

1575

yield factory

1576

else:

1577

for factory in batcher.yield_factories(full_flush=True):

1578

yield factory

1579

for record in source.get_record_stream(keys, ordering,

1580

include_delta_closure):

1581

yield record

1582

for factory in batcher.yield_factories(full_flush=True):

1583

yield factory

1584

1585

def get_sha1s(self, keys):

1586

"""See VersionedFiles.get_sha1s()."""

1587

result = {}

1588

for record in self.get_record_stream(keys, 'unordered', True):

1589

if record.sha1 != None:

1590

result[record.key] = record.sha1

1591

else:

1592

if record.storage_kind != 'absent':

1593

result[record.key] = osutils.sha_string(

1594

record.get_bytes_as('fulltext'))

1595

return result

1596

1597

def insert_record_stream(self, stream):

1598

"""Insert a record stream into this container.

1599

1600

:param stream: A stream of records to insert.

1601

:return: None

1602

:seealso VersionedFiles.get_record_stream:

1603

"""

1604

# XXX: Setting random_id=True makes

1605

# test_insert_record_stream_existing_keys fail for groupcompress and

1606

# groupcompress-nograph, this needs to be revisited while addressing

1607

# 'bzr branch' performance issues.

1608

for _ in self._insert_record_stream(stream, random_id=False):

1609

pass

1610

1611

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1612

reuse_blocks=True):

1613

"""Internal core to insert a record stream into this container.

1614

1615

This helper function has a different interface than insert_record_stream

1616

to allow add_lines to be minimal, but still return the needed data.

1617

1618

:param stream: A stream of records to insert.

1619

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1620

raise ExistingContent, rather than committing the new text.

1621

:param reuse_blocks: If the source is streaming from

1622

groupcompress-blocks, just insert the blocks as-is, rather than

1623

expanding the texts and inserting again.

1624

:return: An iterator over the sha1 of the inserted records.

1625

:seealso insert_record_stream:

1626

:seealso add_lines:

1627

"""

1628

adapters = {}

1629

def get_adapter(adapter_key):

1630

try:

1631

return adapters[adapter_key]

1632

except KeyError:

1633

adapter_factory = adapter_registry.get(adapter_key)

1634

adapter = adapter_factory(self)

1635

adapters[adapter_key] = adapter

1636

return adapter

1637

# This will go up to fulltexts for gc to gc fetching, which isn't

1638

# ideal.

1639

self._compressor = GroupCompressor()

1640

self._unadded_refs = {}

1641

keys_to_add = []

1642

def flush():

1643

bytes = self._compressor.flush().to_bytes()

1644

index, start, length = self._access.add_raw_records(

1645

[(None, len(bytes))], bytes)[0]

1646

nodes = []

1647

for key, reads, refs in keys_to_add:

1648

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1649

self._index.add_records(nodes, random_id=random_id)

1650

self._unadded_refs = {}

1651

del keys_to_add[:]

1652

self._compressor = GroupCompressor()

1653

1654

last_prefix = None

1655

max_fulltext_len = 0

1656

max_fulltext_prefix = None

1657

insert_manager = None

1658

block_start = None

1659

block_length = None

1660

# XXX: TODO: remove this, it is just for safety checking for now

1661

inserted_keys = set()

1662

reuse_this_block = reuse_blocks

1663

for record in stream:

1664

# Raise an error when a record is missing.

1665

if record.storage_kind == 'absent':

1666

raise errors.RevisionNotPresent(record.key, self)

1667

if random_id:

1668

if record.key in inserted_keys:

1669

trace.note('Insert claimed random_id=True,'

1670

' but then inserted %r two times', record.key)

1671

continue

1672

inserted_keys.add(record.key)

1673

if reuse_blocks:

1674

# If the reuse_blocks flag is set, check to see if we can just

1675

# copy a groupcompress block as-is.

1676

# We only check on the first record (groupcompress-block) not

1677

# on all of the (groupcompress-block-ref) entries.

1678

# The reuse_this_block flag is then kept for as long as

1679

if record.storage_kind == 'groupcompress-block':

1680

# Check to see if we really want to re-use this block

1681

insert_manager = record._manager

1682

reuse_this_block = insert_manager.check_is_well_utilized()

1683

else:

1684

reuse_this_block = False

1685

if reuse_this_block:

1686

# We still want to reuse this block

1687

if record.storage_kind == 'groupcompress-block':

1688

# Insert the raw block into the target repo

1689

insert_manager = record._manager

1690

bytes = record._manager._block.to_bytes()

1691

_, start, length = self._access.add_raw_records(

1692

[(None, len(bytes))], bytes)[0]

1693

del bytes

1694

block_start = start

1695

block_length = length

1696

if record.storage_kind in ('groupcompress-block',

1697

'groupcompress-block-ref'):

1698

if insert_manager is None:

1699

raise AssertionError('No insert_manager set')

1700

if insert_manager is not record._manager:

1701

raise AssertionError('insert_manager does not match'

1702

' the current record, we cannot be positive'

1703

' that the appropriate content was inserted.'

1704

)

1705

value = "%d %d %d %d" % (block_start, block_length,

1706

record._start, record._end)

1707

nodes = [(record.key, value, (record.parents,))]

1708

# TODO: Consider buffering up many nodes to be added, not

1709

# sure how much overhead this has, but we're seeing

1710

# ~23s / 120s in add_records calls

1711

self._index.add_records(nodes, random_id=random_id)

1712

continue

1713

try:

1714

bytes = record.get_bytes_as('fulltext')

1715

except errors.UnavailableRepresentation:

1716

adapter_key = record.storage_kind, 'fulltext'

1717

adapter = get_adapter(adapter_key)

1718

bytes = adapter.get_bytes(record)

1719

if len(record.key) > 1:

1720

prefix = record.key[0]

1721

soft = (prefix == last_prefix)

1722

else:

1723

prefix = None

1724

soft = False

1725

if max_fulltext_len < len(bytes):

1726

max_fulltext_len = len(bytes)

1727

max_fulltext_prefix = prefix

1728

(found_sha1, start_point, end_point,

1729

type) = self._compressor.compress(record.key,

1730

bytes, record.sha1, soft=soft,

1731

nostore_sha=nostore_sha)

1732

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1733

# Check if we want to continue to include that text

1734

if (prefix == max_fulltext_prefix

1735

and end_point < 2 * max_fulltext_len):

1736

# As long as we are on the same file_id, we will fill at least

1737

# 2 * max_fulltext_len

1738

start_new_block = False

1739

elif end_point > 4*1024*1024:

1740

start_new_block = True

1741

elif (prefix is not None and prefix != last_prefix

1742

and end_point > 2*1024*1024):

1743

start_new_block = True

1744

else:

1745

start_new_block = False

1746

last_prefix = prefix

1747

if start_new_block:

1748

self._compressor.pop_last()

1749

flush()

1750

max_fulltext_len = len(bytes)

1751

(found_sha1, start_point, end_point,

1752

type) = self._compressor.compress(record.key, bytes,

1753

record.sha1)

1754

if record.key[-1] is None:

1755

key = record.key[:-1] + ('sha1:' + found_sha1,)

1756

else:

1757

key = record.key

1758

self._unadded_refs[key] = record.parents

1759

yield found_sha1

1760

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1761

(record.parents,)))

1762

if len(keys_to_add):

1763

flush()

1764

self._compressor = None

1765

1766

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1767

"""Iterate over the lines in the versioned files from keys.

1768

1769

This may return lines from other keys. Each item the returned

1770

iterator yields is a tuple of a line and a text version that that line

1771

is present in (not introduced in).

1772

1773

Ordering of results is in whatever order is most suitable for the

1774

underlying storage format.

1775

1776

If a progress bar is supplied, it may be used to indicate progress.

1777

The caller is responsible for cleaning up progress bars (because this

1778

is an iterator).

1779

1780

NOTES:

1781

* Lines are normalised by the underlying store: they will all have \n

1782

terminators.

1783

* Lines are returned in arbitrary order.

1784

1785

:return: An iterator over (line, key).

1786

"""

1787

keys = set(keys)

1788

total = len(keys)

1789

# we don't care about inclusions, the caller cares.

1790

# but we need to setup a list of records to visit.

1791

# we need key, position, length

1792

for key_idx, record in enumerate(self.get_record_stream(keys,

1793

'unordered', True)):

1794

# XXX: todo - optimise to use less than full texts.

1795

key = record.key

1796

if pb is not None:

1797

pb.update('Walking content', key_idx, total)

1798

if record.storage_kind == 'absent':

1799

raise errors.RevisionNotPresent(key, self)

1800

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1801

for line in lines:

1802

yield line, key

1803

if pb is not None:

1804

pb.update('Walking content', total, total)

1805

1806

def keys(self):

1807

"""See VersionedFiles.keys."""

1808

if 'evil' in debug.debug_flags:

1809

trace.mutter_callsite(2, "keys scales with size of history")

1810

sources = [self._index] + self._fallback_vfs

1811

result = set()

1812

for source in sources:

1813

result.update(source.keys())

1814

return result

1815

1816

1817

class _GCGraphIndex(object):

1818

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1819

1820

def __init__(self, graph_index, is_locked, parents=True,

1821

add_callback=None, track_external_parent_refs=False,

1822

inconsistency_fatal=True, track_new_keys=False):

1823

"""Construct a _GCGraphIndex on a graph_index.

1824

1825

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1826

:param is_locked: A callback, returns True if the index is locked and

1827

thus usable.

1828

:param parents: If True, record knits parents, if not do not record

1829

parents.

1830

:param add_callback: If not None, allow additions to the index and call

1831

this callback with a list of added GraphIndex nodes:

1832

[(node, value, node_refs), ...]

1833

:param track_external_parent_refs: As keys are added, keep track of the

1834

keys they reference, so that we can query get_missing_parents(),

1835

etc.

1836

:param inconsistency_fatal: When asked to add records that are already

1837

present, and the details are inconsistent with the existing

1838

record, raise an exception instead of warning (and skipping the

1839

record).

1840

"""

1841

self._add_callback = add_callback

1842

self._graph_index = graph_index

1843

self._parents = parents

1844

self.has_graph = parents

1845

self._is_locked = is_locked

1846

self._inconsistency_fatal = inconsistency_fatal

1847

if track_external_parent_refs:

1848

self._key_dependencies = knit._KeyRefs(

1849

track_new_keys=track_new_keys)

1850

else:

1851

self._key_dependencies = None

1852

1853

def add_records(self, records, random_id=False):

1854

"""Add multiple records to the index.

1855

1856

This function does not insert data into the Immutable GraphIndex

1857

backing the KnitGraphIndex, instead it prepares data for insertion by

1858

the caller and checks that it is safe to insert then calls

1859

self._add_callback with the prepared GraphIndex nodes.

1860

1861

:param records: a list of tuples:

1862

(key, options, access_memo, parents).

1863

:param random_id: If True the ids being added were randomly generated

1864

and no check for existence will be performed.

1865

"""

1866

if not self._add_callback:

1867

raise errors.ReadOnlyError(self)

1868

# we hope there are no repositories with inconsistent parentage

1869

# anymore.

1870

1871

changed = False

1872

keys = {}

1873

for (key, value, refs) in records:

1874

if not self._parents:

1875

if refs:

1876

for ref in refs:

1877

if ref:

1878

raise errors.KnitCorrupt(self,

1879

"attempt to add node with parents "

1880

"in parentless index.")

1881

refs = ()

1882

changed = True

1883

keys[key] = (value, refs)

1884

# check for dups

1885

if not random_id:

1886

present_nodes = self._get_entries(keys)

1887

for (index, key, value, node_refs) in present_nodes:

1888

if node_refs != keys[key][1]:

1889

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1890

if self._inconsistency_fatal:

1891

raise errors.KnitCorrupt(self, "inconsistent details"

1892

" in add_records: %s" %

1893

details)

1894

else:

1895

trace.warning("inconsistent details in skipped"

1896

" record: %s", details)

1897

del keys[key]

1898

changed = True

1899

if changed:

1900

result = []

1901

if self._parents:

1902

for key, (value, node_refs) in keys.iteritems():

1903

result.append((key, value, node_refs))

1904

else:

1905

for key, (value, node_refs) in keys.iteritems():

1906

result.append((key, value))

1907

records = result

1908

key_dependencies = self._key_dependencies

1909

if key_dependencies is not None:

1910

if self._parents:

1911

for key, value, refs in records:

1912

parents = refs[0]

1913

key_dependencies.add_references(key, parents)

1914

else:

1915

for key, value, refs in records:

1916

new_keys.add_key(key)

1917

self._add_callback(records)

1918

1919

def _check_read(self):

1920

"""Raise an exception if reads are not permitted."""

1921

if not self._is_locked():

1922

raise errors.ObjectNotLocked(self)

1923

1924

def _check_write_ok(self):

1925

"""Raise an exception if writes are not permitted."""

1926

if not self._is_locked():

1927

raise errors.ObjectNotLocked(self)

1928

1929

def _get_entries(self, keys, check_present=False):

1930

"""Get the entries for keys.

1931

1932

Note: Callers are responsible for checking that the index is locked

1933

before calling this method.

1934

1935

:param keys: An iterable of index key tuples.

1936

"""

1937

keys = set(keys)

1938

found_keys = set()

1939

if self._parents:

1940

for node in self._graph_index.iter_entries(keys):

1941

yield node

1942

found_keys.add(node[1])

1943

else:

1944

# adapt parentless index to the rest of the code.

1945

for node in self._graph_index.iter_entries(keys):

1946

yield node[0], node[1], node[2], ()

1947

found_keys.add(node[1])

1948

if check_present:

1949

missing_keys = keys.difference(found_keys)

1950

if missing_keys:

1951

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1952

1953

def find_ancestry(self, keys):

1954

"""See CombinedGraphIndex.find_ancestry"""

1955

return self._graph_index.find_ancestry(keys, 0)

1956

1957

def get_parent_map(self, keys):

1958

"""Get a map of the parents of keys.

1959

1960

:param keys: The keys to look up parents for.

1961

:return: A mapping from keys to parents. Absent keys are absent from

1962

the mapping.

1963

"""

1964

self._check_read()

1965

nodes = self._get_entries(keys)

1966

result = {}

1967

if self._parents:

1968

for node in nodes:

1969

result[node[1]] = node[3][0]

1970

else:

1971

for node in nodes:

1972

result[node[1]] = None

1973

return result

1974

1975

def get_missing_parents(self):

1976

"""Return the keys of missing parents."""

1977

# Copied from _KnitGraphIndex.get_missing_parents

1978

# We may have false positives, so filter those out.

1979

self._key_dependencies.satisfy_refs_for_keys(

1980

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1981

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1982

1983

def get_build_details(self, keys):

1984

"""Get the various build details for keys.

1985

1986

Ghosts are omitted from the result.

1987

1988

:param keys: An iterable of keys.

1989

:return: A dict of key:

1990

(index_memo, compression_parent, parents, record_details).

1991

index_memo

1992

opaque structure to pass to read_records to extract the raw

1993

data

1994

compression_parent

1995

Content that this record is built upon, may be None

1996

parents

1997

Logical parents of this node

1998

record_details

1999

extra information about the content which needs to be passed to

2000

Factory.parse_record

2001

"""

2002

self._check_read()

2003

result = {}

2004

entries = self._get_entries(keys)

2005

for entry in entries:

2006

key = entry[1]

2007

if not self._parents:

2008

parents = None

2009

else:

2010

parents = entry[3][0]

2011

method = 'group'

2012

result[key] = (self._node_to_position(entry),

2013

None, parents, (method, None))

2014

return result

2015

2016

def keys(self):

2017

"""Get all the keys in the collection.

2018

2019

The keys are not ordered.

2020

"""

2021

self._check_read()

2022

return [node[1] for node in self._graph_index.iter_all_entries()]

2023

2024

def _node_to_position(self, node):

2025

"""Convert an index value to position details."""

2026

bits = node[2].split(' ')

2027

# It would be nice not to read the entire gzip.

2028

start = int(bits[0])

2029

stop = int(bits[1])

2030

basis_end = int(bits[2])

2031

delta_end = int(bits[3])

2032

return node[0], start, stop, basis_end, delta_end

2033

2034

def scan_unvalidated_index(self, graph_index):

2035

"""Inform this _GCGraphIndex that there is an unvalidated index.

2036

2037

This allows this _GCGraphIndex to keep track of any missing

2038

compression parents we may want to have filled in to make those

2039

indices valid. It also allows _GCGraphIndex to track any new keys.

2040

2041

:param graph_index: A GraphIndex

2042

"""

2043

key_dependencies = self._key_dependencies

2044

if key_dependencies is None:

2045

return

2046

for node in graph_index.iter_all_entries():

2047

# Add parent refs from graph_index (and discard parent refs

2048

# that the graph_index has).

2049

key_dependencies.add_references(node[1], node[3][0])

2050

2051

2052

from bzrlib._groupcompress_py import (

2053

apply_delta,

2054

apply_delta_to_source,

2055

encode_base128_int,

2056

decode_base128_int,

2057

decode_copy_instruction,

2058

LinesDeltaIndex,

2059

)

2060

try:

2061

from bzrlib._groupcompress_pyx import (

2062

apply_delta,

2063

apply_delta_to_source,

2064

DeltaIndex,

2065

encode_base128_int,

2066

decode_base128_int,

2067

)

2068

GroupCompressor = PyrexGroupCompressor

2069

except ImportError:

2070

GroupCompressor = PythonGroupCompressor

2071

Older »