~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-10-06 20:45:48 UTC
mfrom: (4728.1.2 integration)
Revision ID: pqm@pqm.ubuntu.com-20091006204548-bjnc3z4k256ppimz

MutableTree.has_changes() does not require a tree parameter anymore

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/crash.py

bzrlib/doc_generate/sphinx_conf.py

bzrlib/filters/eol.py

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/inventory_delta.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/features.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/script.py

bzrlib/tests/test__annotator.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_script.py

bzrlib/tests/test_serializer.py

bzrlib/transport/pathfilter.py

bzrlib/util/bencode.py

doc/Bazaar-Logo-For-Manuals.png

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/apport.txt

doc/developers/bug-handling.txt

doc/developers/check.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/implementation-notes.txt

doc/developers/index-plain.txt

doc/developers/miscellaneous-notes.txt

doc/developers/plans.txt

doc/developers/process.txt

doc/developers/specifications.txt

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/_static/es/bzr-es-quick-reference.png

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/quick-reference/index.txt

doc/es/user-guide/index-plain.txt

doc/index.es.txt

doc/index.ru.txt

doc/ru

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_static/ru/Makefile

doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/index.txt

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index-plain.txt

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/generate_release_notes.py

tools/package_docs.py

tools/packaging/update-control.sh

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files removed:
bzrlib/tests/blackbox/test_guess_renames.py

bzrlib/tests/test_http_implementations.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/tests/test_bencode.py

doc/bazaar-vcs.org.kid

doc/developers/performance-contributing.txt

doc/en/developer-guide

doc/en/user-guide/index.txt

doc/es/guia-desarrollador

doc/es/notas-version

doc/es/referencia

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py => bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

doc/en/quick-reference/Makefile => doc/en/_static/en/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/en/_static/en/bzr-en-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/en/_static/en/bzr-en-quick-reference.svg

doc/es/referencia-rapida/Makefile => doc/es/_static/es/Makefile

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/_static/es/bzr-es-quick-reference.svg

doc/index.es.txt => doc/es/index.txt

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/guia-usuario/ => doc/es/user-guide/

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

BRANCH.TODO

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_readdir_pyx.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/counted_lock.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/fetch.py

bzrlib/filters/__init__.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/version.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/dirstate.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/performance-roadmap.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/server.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

doc/es/user-guide/index.txt

doc/index.txt

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/win32/build_release.py

tools/win32/bzr.iss.cog

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

"""Core compression logic for compressing streams of related files."""

from itertools import izip

from cStringIO import StringIO

import time

import zlib

try:

from bzrlib import (

annotate,

debug,

diff,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

patiencediff,

trace,

)

from bzrlib.graph import Graph

from bzrlib.knit import _DirectPackAccess

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

VersionedFiles,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = [item]

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

100

101

100

def __init__(self):

102

101

# map by key? or just order in file?

106

105

self._z_content_length = None

107

106

self._content_length = None

108

107

self._content = None

108

self._content_chunks = None

109

110

def __len__(self):

111

# This is the maximum number of bytes this object will reference if

128

# _z_content because of this.

129

if num_bytes is None:

130

num_bytes = self._content_length

131

if self._content_length is not None:

132

assert num_bytes <= self._content_length

133

if self._content is None:

134

assert self._z_content is not None

131

elif (self._content_length is not None

132

and num_bytes > self._content_length):

133

raise AssertionError(

134

'requested num_bytes (%d) > content length (%d)'

135

% (num_bytes, self._content_length))

136

# Expand the content if required

137

if self._content is None:

138

if self._content_chunks is not None:

139

self._content = ''.join(self._content_chunks)

140

self._content_chunks = None

141

if self._content is None:

142

if self._z_content is None:

143

raise AssertionError('No content to decompress')

135

144

if self._z_content == '':

136

145

self._content = ''

137

146

elif self._compressor_name == 'lzma':

138

147

# We don't do partial lzma decomp yet

139

148

self._content = pylzma.decompress(self._z_content)

140

else:

149

elif self._compressor_name == 'zlib':

141

150

# Start a zlib decompressor

142

assert self._compressor_name == 'zlib'

143

151

if num_bytes is None:

144

152

self._content = zlib.decompress(self._z_content)

145

153

else:

148

156

# that the rest of the code is simplified

149

157

self._content = self._z_content_decompressor.decompress(

150

158

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

151

# Any bytes remaining to be decompressed will be in the

152

# decompressors 'unconsumed_tail'

159

else:

160

raise AssertionError('Unknown compressor: %r'

161

% self._compressor_name)

162

# Any bytes remaining to be decompressed will be in the decompressors

163

# 'unconsumed_tail'

164

153

165

# Do we have enough bytes already?

154

166

if num_bytes is not None and len(self._content) >= num_bytes:

155

167

return

157

169

# We must have already decompressed everything

158

170

return

159

171

# If we got this far, and don't have a decompressor, something is wrong

160

assert self._z_content_decompressor is not None

172

if self._z_content_decompressor is None:

173

raise AssertionError(

174

'No decompressor to decompress %d bytes' % num_bytes)

161

175

remaining_decomp = self._z_content_decompressor.unconsumed_tail

162

176

if num_bytes is None:

163

177

if remaining_decomp:

164

178

# We don't know how much is left, but we'll decompress it all

165

179

self._content += self._z_content_decompressor.decompress(

166

180

remaining_decomp)

167

# Note: There what I consider a bug in zlib.decompressobj

181

# Note: There's what I consider a bug in zlib.decompressobj

168

182

# If you pass back in the entire unconsumed_tail, only

169

183

# this time you don't pass a max-size, it doesn't

170

184

# change the unconsumed_tail back to None/''.

171

185

# However, we know we are done with the whole stream

172

186

self._z_content_decompressor = None

187

# XXX: Why is this the only place in this routine we set this?

173

188

self._content_length = len(self._content)

174

189

else:

175

# If we have nothing left to decomp, we ran out of decomp bytes

176

assert remaining_decomp

190

if not remaining_decomp:

191

raise AssertionError('Nothing left to decompress')

177

192

needed_bytes = num_bytes - len(self._content)

178

193

# We always set max_size to 32kB over the minimum needed, so that

179

194

# zlib will give us as much as we really want.

181

196

# that keeps expanding the request until we get enough

182

197

self._content += self._z_content_decompressor.decompress(

183

198

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

184

assert len(self._content) >= num_bytes

199

if len(self._content) < num_bytes:

200

raise AssertionError('%d bytes wanted, only %d available'

201

% (num_bytes, len(self._content)))

185

202

if not self._z_content_decompressor.unconsumed_tail:

186

203

# The stream is finished

187

204

self._z_content_decompressor = None

202

219

pos2 = bytes.index('\n', pos, pos + 14)

203

220

self._content_length = int(bytes[pos:pos2])

204

221

pos = pos2 + 1

205

assert len(bytes) == (pos + self._z_content_length)

222

if len(bytes) != (pos + self._z_content_length):

223

# XXX: Define some GCCorrupt error ?

224

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

225

(len(bytes), pos, self._z_content_length))

206

226

self._z_content = bytes[pos:]

207

assert len(self._z_content) == self._z_content_length

208

227

209

228

@classmethod

210

229

def from_bytes(cls, bytes):

211

230

out = cls()

212

if bytes[:6] not in (cls.GCB_HEADER, cls.GCB_LZ_HEADER):

213

raise ValueError('bytes did not start with %r' % (cls.GCB_HEADER,))

231

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

232

raise ValueError('bytes did not start with any of %r'

233

% (cls.GCB_KNOWN_HEADERS,))

234

# XXX: why not testing the whole header ?

214

235

if bytes[4] == 'z':

215

236

out._compressor_name = 'zlib'

216

237

elif bytes[4] == 'l':

254

275

bytes = apply_delta_to_source(self._content, content_start, end)

255

276

return bytes

256

277

278

def set_chunked_content(self, content_chunks, length):

279

"""Set the content of this block to the given chunks."""

280

# If we have lots of short lines, it is may be more efficient to join

281

# the content ahead of time. If the content is <10MiB, we don't really

282

# care about the extra memory consumption, so we can just pack it and

283

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

284

# mysql, which is below the noise margin

285

self._content_length = length

286

self._content_chunks = content_chunks

287

self._content = None

288

self._z_content = None

289

257

290

def set_content(self, content):

258

291

"""Set the content of this block."""

259

292

self._content_length = len(content)

260

293

self._content = content

261

294

self._z_content = None

262

295

296

def _create_z_content_using_lzma(self):

297

if self._content_chunks is not None:

298

self._content = ''.join(self._content_chunks)

299

self._content_chunks = None

300

if self._content is None:

301

raise AssertionError('Nothing to compress')

302

self._z_content = pylzma.compress(self._content)

303

self._z_content_length = len(self._z_content)

304

305

def _create_z_content_from_chunks(self):

306

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

307

compressed_chunks = map(compressor.compress, self._content_chunks)

308

compressed_chunks.append(compressor.flush())

309

self._z_content = ''.join(compressed_chunks)

310

self._z_content_length = len(self._z_content)

311

312

def _create_z_content(self):

313

if self._z_content is not None:

314

return

315

if _USE_LZMA:

316

self._create_z_content_using_lzma()

317

return

318

if self._content_chunks is not None:

319

self._create_z_content_from_chunks()

320

return

321

self._z_content = zlib.compress(self._content)

322

self._z_content_length = len(self._z_content)

323

263

324

def to_bytes(self):

264

325

"""Encode the information into a byte stream."""

265

compress = zlib.compress

266

if _USE_LZMA:

267

compress = pylzma.compress

268

if self._z_content is None:

269

assert self._content is not None

270

self._z_content = compress(self._content)

271

self._z_content_length = len(self._z_content)

326

self._create_z_content()

272

327

if _USE_LZMA:

273

328

header = self.GCB_LZ_HEADER

274

329

else:

279

334

]

280

335

return ''.join(chunks)

281

336

337

def _dump(self, include_text=False):

338

"""Take this block, and spit out a human-readable structure.

339

340

:param include_text: Inserts also include text bits, chose whether you

341

want this displayed in the dump or not.

342

:return: A dump of the given block. The layout is something like:

343

[('f', length), ('d', delta_length, text_length, [delta_info])]

344

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

345

...]

346

"""

347

self._ensure_content()

348

result = []

349

pos = 0

350

while pos < self._content_length:

351

kind = self._content[pos]

352

pos += 1

353

if kind not in ('f', 'd'):

354

raise ValueError('invalid kind character: %r' % (kind,))

355

content_len, len_len = decode_base128_int(

356

self._content[pos:pos + 5])

357

pos += len_len

358

if content_len + pos > self._content_length:

359

raise ValueError('invalid content_len %d for record @ pos %d'

360

% (content_len, pos - len_len - 1))

361

if kind == 'f': # Fulltext

362

if include_text:

363

text = self._content[pos:pos+content_len]

364

result.append(('f', content_len, text))

365

else:

366

result.append(('f', content_len))

367

elif kind == 'd': # Delta

368

delta_content = self._content[pos:pos+content_len]

369

delta_info = []

370

# The first entry in a delta is the decompressed length

371

decomp_len, delta_pos = decode_base128_int(delta_content)

372

result.append(('d', content_len, decomp_len, delta_info))

373

measured_len = 0

374

while delta_pos < content_len:

375

c = ord(delta_content[delta_pos])

376

delta_pos += 1

377

if c & 0x80: # Copy

378

(offset, length,

379

delta_pos) = decode_copy_instruction(delta_content, c,

380

delta_pos)

381

if include_text:

382

text = self._content[offset:offset+length]

383

delta_info.append(('c', offset, length, text))

384

else:

385

delta_info.append(('c', offset, length))

386

measured_len += length

387

else: # Insert

388

if include_text:

389

txt = delta_content[delta_pos:delta_pos+c]

390

else:

391

txt = ''

392

delta_info.append(('i', c, txt))

393

measured_len += c

394

delta_pos += c

395

if delta_pos != content_len:

396

raise ValueError('Delta consumed a bad number of bytes:'

397

' %d != %d' % (delta_pos, content_len))

398

if measured_len != decomp_len:

399

raise ValueError('Delta claimed fulltext was %d bytes, but'

400

' extraction resulted in %d bytes'

401

% (decomp_len, measured_len))

402

pos += content_len

403

return result

404

282

405

283

406

class _LazyGroupCompressFactory(object):

284

407

"""Yield content from a GroupCompressBlock on demand."""

331

454

self._manager._prepare_for_extract()

332

455

block = self._manager._block

333

456

self._bytes = block.extract(self.key, self._start, self._end)

334

# XXX: It seems the smart fetch extracts inventories and chk

335

# pages as fulltexts to find the next chk pages, but then

336

# passes them down to be inserted as a

337

# groupcompress-block, so this is not safe to do. Perhaps

338

# we could just change the storage kind to "fulltext" at

339

# that point?

340

# self._manager = None

457

# There are code paths that first extract as fulltext, and then

458

# extract as storage_kind (smart fetch). So we don't break the

459

# refcycle here, but instead in manager.get_record_stream()

341

460

if storage_kind == 'fulltext':

342

461

return self._bytes

343

462

else:

349

468

class _LazyGroupContentManager(object):

350

469

"""This manages a group of _LazyGroupCompressFactory objects."""

351

470

471

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

472

# current size, and still be considered

473

# resuable

474

_full_block_size = 4*1024*1024

475

_full_mixed_block_size = 2*1024*1024

476

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

477

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

478

352

479

def __init__(self, block):

353

480

self._block = block

354

481

# We need to preserve the ordering

376

503

yield factory

377

504

# Break the ref-cycle

378

505

factory._bytes = None

379

# XXX: this is not safe, the smart fetch code requests the content

380

# as both a 'fulltext', and then later on as a

381

# groupcompress-block. The iter_interesting_nodes code also is

382

# still buffering multiple records and returning them later.

383

# So that code would need to be updated to either re-fetch the

384

# original object, or buffer it somehow.

385

# factory._manager = None

506

factory._manager = None

386

507

# TODO: Consider setting self._factories = None after the above loop,

387

508

# as it will break the reference cycle

388

509

406

527

end_point = 0

407

528

for factory in self._factories:

408

529

bytes = factory.get_bytes_as('fulltext')

409

(found_sha1, start_point, end_point, type,

410

length) = compressor.compress(factory.key, bytes, factory.sha1)

530

(found_sha1, start_point, end_point,

531

type) = compressor.compress(factory.key, bytes, factory.sha1)

411

532

# Now update this factory with the new offsets, etc

412

533

factory.sha1 = found_sha1

413

534

factory._start = start_point

432

553

# time (self._block._content) is a little expensive.

433

554

self._block._ensure_content(self._last_byte)

434

555

435

def _check_rebuild_block(self):

556

def _check_rebuild_action(self):

436

557

"""Check to see if our block should be repacked."""

437

558

total_bytes_used = 0

438

559

last_byte_used = 0

439

560

for factory in self._factories:

440

561

total_bytes_used += factory._end - factory._start

441

last_byte_used = max(last_byte_used, factory._end)

442

# If we are using most of the bytes from the block, we have nothing

443

# else to check (currently more that 1/2)

562

if last_byte_used < factory._end:

563

last_byte_used = factory._end

564

# If we are using more than half of the bytes from the block, we have

565

# nothing else to check

444

566

if total_bytes_used * 2 >= self._block._content_length:

445

return

446

# Can we just strip off the trailing bytes? If we are going to be

447

# transmitting more than 50% of the front of the content, go ahead

567

return None, last_byte_used, total_bytes_used

568

# We are using less than 50% of the content. Is the content we are

569

# using at the beginning of the block? If so, we can just trim the

570

# tail, rather than rebuilding from scratch.

448

571

if total_bytes_used * 2 > last_byte_used:

449

self._trim_block(last_byte_used)

450

return

572

return 'trim', last_byte_used, total_bytes_used

451

573

452

574

# We are using a small amount of the data, and it isn't just packed

453

575

# nicely at the front, so rebuild the content.

460

582

# expanding many deltas into fulltexts, as well.

461

583

# If we build a cheap enough 'strip', then we could try a strip,

462

584

# if that expands the content, we then rebuild.

463

self._rebuild_block()

585

return 'rebuild', last_byte_used, total_bytes_used

586

587

def check_is_well_utilized(self):

588

"""Is the current block considered 'well utilized'?

589

590

This heuristic asks if the current block considers itself to be a fully

591

developed group, rather than just a loose collection of data.

592

"""

593

if len(self._factories) == 1:

594

# A block of length 1 could be improved by combining with other

595

# groups - don't look deeper. Even larger than max size groups

596

# could compress well with adjacent versions of the same thing.

597

return False

598

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

599

block_size = self._block._content_length

600

if total_bytes_used < block_size * self._max_cut_fraction:

601

# This block wants to trim itself small enough that we want to

602

# consider it under-utilized.

603

return False

604

# TODO: This code is meant to be the twin of _insert_record_stream's

605

# 'start_new_block' logic. It would probably be better to factor

606

# out that logic into a shared location, so that it stays

607

# together better

608

# We currently assume a block is properly utilized whenever it is >75%

609

# of the size of a 'full' block. In normal operation, a block is

610

# considered full when it hits 4MB of same-file content. So any block

611

# >3MB is 'full enough'.

612

# The only time this isn't true is when a given block has large-object

613

# content. (a single file >4MB, etc.)

614

# Under these circumstances, we allow a block to grow to

615

# 2 x largest_content. Which means that if a given block had a large

616

# object, it may actually be under-utilized. However, given that this

617

# is 'pack-on-the-fly' it is probably reasonable to not repack large

618

# content blobs on-the-fly. Note that because we return False for all

619

# 1-item blobs, we will repack them; we may wish to reevaluate our

620

# treatment of large object blobs in the future.

621

if block_size >= self._full_enough_block_size:

622

return True

623

# If a block is <3MB, it still may be considered 'full' if it contains

624

# mixed content. The current rule is 2MB of mixed content is considered

625

# full. So check to see if this block contains mixed content, and

626

# set the threshold appropriately.

627

common_prefix = None

628

for factory in self._factories:

629

prefix = factory.key[:-1]

630

if common_prefix is None:

631

common_prefix = prefix

632

elif prefix != common_prefix:

633

# Mixed content, check the size appropriately

634

if block_size >= self._full_enough_mixed_block_size:

635

return True

636

break

637

# The content failed both the mixed check and the single-content check

638

# so obviously it is not fully utilized

639

# TODO: there is one other constraint that isn't being checked

640

# namely, that the entries in the block are in the appropriate

641

# order. For example, you could insert the entries in exactly

642

# reverse groupcompress order, and we would think that is ok.

643

# (all the right objects are in one group, and it is fully

644

# utilized, etc.) For now, we assume that case is rare,

645

# especially since we should always fetch in 'groupcompress'

646

# order.

647

return False

648

649

def _check_rebuild_block(self):

650

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

651

if action is None:

652

return

653

if action == 'trim':

654

self._trim_block(last_byte_used)

655

elif action == 'rebuild':

656

self._rebuild_block()

657

else:

658

raise ValueError('unknown rebuild action: %r' % (action,))

464

659

465

660

def _wire_bytes(self):

466

661

"""Return a byte stream suitable for transmitting over the wire."""

492

687

record_header = '%s\n%s\n%d\n%d\n' % (

493

688

key_bytes, parent_bytes, factory._start, factory._end)

494

689

header_lines.append(record_header)

690

# TODO: Can we break the refcycle at this point and set

691

# factory._manager = None?

495

692

header_bytes = ''.join(header_lines)

496

693

del header_lines

497

694

header_bytes_len = len(header_bytes)

593

790

:param soft: Do a 'soft' compression. This means that we require larger

594

791

ranges to match to be considered for a copy command.

595

792

596

:return: The sha1 of lines, the start and end offsets in the delta, the

597

type ('fulltext' or 'delta') and the number of bytes accumulated in

598

the group output so far.

793

:return: The sha1 of lines, the start and end offsets in the delta, and

794

the type ('fulltext' or 'delta').

599

795

600

796

:seealso VersionedFiles.add_lines:

601

797

"""

602

798

if not bytes: # empty, like a dir entry, etc

603

799

if nostore_sha == _null_sha1:

604

800

raise errors.ExistingContent()

605

return _null_sha1, 0, 0, 'fulltext', 0

801

return _null_sha1, 0, 0, 'fulltext'

606

802

# we assume someone knew what they were doing when they passed it in

607

803

if expected_sha is not None:

608

804

sha1 = expected_sha

614

810

if key[-1] is None:

615

811

key = key[:-1] + ('sha1:' + sha1,)

616

812

617

return self._compress(key, bytes, sha1, len(bytes) / 2, soft)

813

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

814

return sha1, start, end, type

618

815

619

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

816

def _compress(self, key, bytes, max_delta_size, soft=False):

620

817

"""Compress lines with label key.

621

818

622

819

:param key: A key tuple. It is stored in the output for identification

624

821

625

822

:param bytes: The bytes to be compressed

626

823

627

:param sha1: The sha1 for 'bytes'.

628

629

824

:param max_delta_size: The size above which we issue a fulltext instead

630

825

of a delta.

631

826

632

827

:param soft: Do a 'soft' compression. This means that we require larger

633

828

ranges to match to be considered for a copy command.

634

829

635

:return: The sha1 of lines, the start and end offsets in the delta, the

636

type ('fulltext' or 'delta') and the number of bytes accumulated in

637

the group output so far.

830

:return: The sha1 of lines, the start and end offsets in the delta, and

831

the type ('fulltext' or 'delta').

638

832

"""

639

833

raise NotImplementedError(self._compress)

640

834

676

870

677

871

After calling this, the compressor should no longer be used

678

872

"""

679

content = ''.join(self.chunks)

873

# TODO: this causes us to 'bloat' to 2x the size of content in the

874

# group. This has an impact for 'commit' of large objects.

875

# One possibility is to use self._content_chunks, and be lazy and

876

# only fill out self._content as a full string when we actually

877

# need it. That would at least drop the peak memory consumption

878

# for 'commit' down to ~1x the size of the largest file, at a

879

# cost of increased complexity within this code. 2x is still <<

880

# 3x the size of the largest file, so we are doing ok.

881

self._block.set_chunked_content(self.chunks, self.endpoint)

680

882

self.chunks = None

681

883

self._delta_index = None

682

self._block.set_content(content)

683

884

return self._block

684

885

685

886

def pop_last(self):

703

904

def __init__(self):

704

905

"""Create a GroupCompressor.

705

906

706

:param delta: If False, do not compress records.

907

Used only if the pyrex version is not available.

707

908

"""

708

909

super(PythonGroupCompressor, self).__init__()

709

910

self._delta_index = LinesDeltaIndex([])

710

911

# The actual content is managed by LinesDeltaIndex

711

912

self.chunks = self._delta_index.lines

712

913

713

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

914

def _compress(self, key, bytes, max_delta_size, soft=False):

714

915

"""see _CommonGroupCompressor._compress"""

715

bytes_length = len(bytes)

916

input_len = len(bytes)

716

917

new_lines = osutils.split_lines(bytes)

717

out_lines, index_lines = self._delta_index.make_delta(new_lines,

718

bytes_length=bytes_length, soft=soft)

918

out_lines, index_lines = self._delta_index.make_delta(

919

new_lines, bytes_length=input_len, soft=soft)

719

920

delta_length = sum(map(len, out_lines))

720

921

if delta_length > max_delta_size:

721

922

# The delta is longer than the fulltext, insert a fulltext

722

923

type = 'fulltext'

723

out_lines = ['f', encode_base128_int(bytes_length)]

924

out_lines = ['f', encode_base128_int(input_len)]

724

925

out_lines.extend(new_lines)

725

926

index_lines = [False, False]

726

927

index_lines.extend([True] * len(new_lines))

727

out_length = len(out_lines[1]) + bytes_length + 1

728

928

else:

729

929

# this is a worthy delta, output it

730

930

type = 'delta'

731

931

out_lines[0] = 'd'

732

932

# Update the delta_length to include those two encoded integers

733

933

out_lines[1] = encode_base128_int(delta_length)

734

out_length = len(out_lines[3]) + 1 + delta_length

735

start = self.endpoint # Before insertion

736

chunk_start = len(self._delta_index.lines)

934

# Before insertion

935

start = self.endpoint

936

chunk_start = len(self.chunks)

937

self._last = (chunk_start, self.endpoint)

737

938

self._delta_index.extend_lines(out_lines, index_lines)

738

939

self.endpoint = self._delta_index.endpoint

739

self.input_bytes += bytes_length

740

chunk_end = len(self._delta_index.lines)

940

self.input_bytes += input_len

941

chunk_end = len(self.chunks)

741

942

self.labels_deltas[key] = (start, chunk_start,

742

943

self.endpoint, chunk_end)

743

return sha1, start, self.endpoint, type, out_length

944

return start, self.endpoint, type

744

945

745

946

746

947

class PyrexGroupCompressor(_CommonGroupCompressor):

763

964

super(PyrexGroupCompressor, self).__init__()

764

965

self._delta_index = DeltaIndex()

765

966

766

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

967

def _compress(self, key, bytes, max_delta_size, soft=False):

767

968

"""see _CommonGroupCompressor._compress"""

768

969

input_len = len(bytes)

769

970

# By having action/label/sha1/len, we can parse the group if the index

784

985

type = 'fulltext'

785

986

enc_length = encode_base128_int(len(bytes))

786

987

len_mini_header = 1 + len(enc_length)

787

length = len(bytes) + len_mini_header

788

988

self._delta_index.add_source(bytes, len_mini_header)

789

989

new_chunks = ['f', enc_length, bytes]

790

990

else:

791

991

type = 'delta'

792

992

enc_length = encode_base128_int(len(delta))

793

993

len_mini_header = 1 + len(enc_length)

794

length = len(delta) + len_mini_header

795

994

new_chunks = ['d', enc_length, delta]

796

995

self._delta_index.add_delta_source(delta, len_mini_header)

797

996

# Before insertion

807

1006

raise AssertionError('the delta index is out of sync'

808

1007

'with the output lines %s != %s'

809

1008

% (self._delta_index._source_offset, self.endpoint))

810

return sha1, start, self.endpoint, type, length

1009

return start, self.endpoint, type

811

1010

812

1011

def _output_chunks(self, new_chunks):

813

1012

"""Output some chunks.

821

1020

self.endpoint = endpoint

822

1021

823

1022

824

def make_pack_factory(graph, delta, keylength):

1023

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

825

1024

"""Create a factory for creating a pack based groupcompress.

826

1025

827

1026

This is only functional enough to run interface tests, it doesn't try to

842

1041

writer = pack.ContainerWriter(stream.write)

843

1042

writer.begin()

844

1043

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

845

add_callback=graph_index.add_nodes)

846

access = _DirectPackAccess({})

1044

add_callback=graph_index.add_nodes,

1045

inconsistency_fatal=inconsistency_fatal)

1046

access = knit._DirectPackAccess({})

847

1047

access.set_writer(writer, graph_index, (transport, 'newpack'))

848

1048

result = GroupCompressVersionedFiles(index, access, delta)

849

1049

result.stream = stream

857

1057

versioned_files.stream.close()

858

1058

859

1059

1060

class _BatchingBlockFetcher(object):

1061

"""Fetch group compress blocks in batches.

1062

1063

:ivar total_bytes: int of expected number of bytes needed to fetch the

1064

currently pending batch.

1065

"""

1066

1067

def __init__(self, gcvf, locations):

1068

self.gcvf = gcvf

1069

self.locations = locations

1070

self.keys = []

1071

self.batch_memos = {}

1072

self.memos_to_get = []

1073

self.total_bytes = 0

1074

self.last_read_memo = None

1075

self.manager = None

1076

1077

def add_key(self, key):

1078

"""Add another to key to fetch.

1079

1080

:return: The estimated number of bytes needed to fetch the batch so

1081

far.

1082

"""

1083

self.keys.append(key)

1084

index_memo, _, _, _ = self.locations[key]

1085

read_memo = index_memo[0:3]

1086

# Three possibilities for this read_memo:

1087

# - it's already part of this batch; or

1088

# - it's not yet part of this batch, but is already cached; or

1089

# - it's not yet part of this batch and will need to be fetched.

1090

if read_memo in self.batch_memos:

1091

# This read memo is already in this batch.

1092

return self.total_bytes

1093

try:

1094

cached_block = self.gcvf._group_cache[read_memo]

1095

except KeyError:

1096

# This read memo is new to this batch, and the data isn't cached

1097

# either.

1098

self.batch_memos[read_memo] = None

1099

self.memos_to_get.append(read_memo)

1100

byte_length = read_memo[2]

1101

self.total_bytes += byte_length

1102

else:

1103

# This read memo is new to this batch, but cached.

1104

# Keep a reference to the cached block in batch_memos because it's

1105

# certain that we'll use it when this batch is processed, but

1106

# there's a risk that it would fall out of _group_cache between now

1107

# and then.

1108

self.batch_memos[read_memo] = cached_block

1109

return self.total_bytes

1110

1111

def _flush_manager(self):

1112

if self.manager is not None:

1113

for factory in self.manager.get_record_stream():

1114

yield factory

1115

self.manager = None

1116

self.last_read_memo = None

1117

1118

def yield_factories(self, full_flush=False):

1119

"""Yield factories for keys added since the last yield. They will be

1120

returned in the order they were added via add_key.

1121

1122

:param full_flush: by default, some results may not be returned in case

1123

they can be part of the next batch. If full_flush is True, then

1124

all results are returned.

1125

"""

1126

if self.manager is None and not self.keys:

1127

return

1128

# Fetch all memos in this batch.

1129

blocks = self.gcvf._get_blocks(self.memos_to_get)

1130

# Turn blocks into factories and yield them.

1131

memos_to_get_stack = list(self.memos_to_get)

1132

memos_to_get_stack.reverse()

1133

for key in self.keys:

1134

index_memo, _, parents, _ = self.locations[key]

1135

read_memo = index_memo[:3]

1136

if self.last_read_memo != read_memo:

1137

# We are starting a new block. If we have a

1138

# manager, we have found everything that fits for

1139

# now, so yield records

1140

for factory in self._flush_manager():

1141

yield factory

1142

# Now start a new manager.

1143

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1144

# The next block from _get_blocks will be the block we

1145

# need.

1146

block_read_memo, block = blocks.next()

1147

if block_read_memo != read_memo:

1148

raise AssertionError(

1149

"block_read_memo out of sync with read_memo"

1150

"(%r != %r)" % (block_read_memo, read_memo))

1151

self.batch_memos[read_memo] = block

1152

memos_to_get_stack.pop()

1153

else:

1154

block = self.batch_memos[read_memo]

1155

self.manager = _LazyGroupContentManager(block)

1156

self.last_read_memo = read_memo

1157

start, end = index_memo[3:5]

1158

self.manager.add_factory(key, parents, start, end)

1159

if full_flush:

1160

for factory in self._flush_manager():

1161

yield factory

1162

del self.keys[:]

1163

self.batch_memos.clear()

1164

del self.memos_to_get[:]

1165

self.total_bytes = 0

1166

1167

860

1168

class GroupCompressVersionedFiles(VersionedFiles):

861

1169

"""A group-compress based VersionedFiles implementation."""

862

1170

863

def __init__(self, index, access, delta=True):

1171

def __init__(self, index, access, delta=True, _unadded_refs=None):

864

1172

"""Create a GroupCompressVersionedFiles object.

865

1173

866

1174

:param index: The index object storing access and graph data.

867

1175

:param access: The access object storing raw data.

868

1176

:param delta: Whether to delta compress or just entropy compress.

1177

:param _unadded_refs: private parameter, don't use.

869

1178

"""

870

1179

self._index = index

871

1180

self._access = access

872

1181

self._delta = delta

873

self._unadded_refs = {}

1182

if _unadded_refs is None:

1183

_unadded_refs = {}

1184

self._unadded_refs = _unadded_refs

874

1185

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

875

1186

self._fallback_vfs = []

876

1187

1188

def without_fallbacks(self):

1189

"""Return a clone of this object without any fallbacks configured."""

1190

return GroupCompressVersionedFiles(self._index, self._access,

1191

self._delta, _unadded_refs=dict(self._unadded_refs))

1192

877

1193

def add_lines(self, key, parents, lines, parent_texts=None,

878

1194

left_matching_blocks=None, nostore_sha=None, random_id=False,

879

1195

check_content=True):

924

1240

nostore_sha=nostore_sha))[0]

925

1241

return sha1, length, None

926

1242

1243

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1244

"""See VersionedFiles._add_text()."""

1245

self._index._check_write_ok()

1246

self._check_add(key, None, random_id, check_content=False)

1247

if text.__class__ is not str:

1248

raise errors.BzrBadParameterUnicode("text")

1249

if parents is None:

1250

# The caller might pass None if there is no graph data, but kndx

1251

# indexes can't directly store that, so we give them

1252

# an empty tuple instead.

1253

parents = ()

1254

# double handling for now. Make it work until then.

1255

length = len(text)

1256

record = FulltextContentFactory(key, parents, None, text)

1257

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1258

nostore_sha=nostore_sha))[0]

1259

return sha1, length, None

1260

927

1261

def add_fallback_versioned_files(self, a_versioned_files):

928

1262

"""Add a source of texts for texts not present in this knit.

929

1263

933

1267

934

1268

def annotate(self, key):

935

1269

"""See VersionedFiles.annotate."""

936

graph = Graph(self)

937

parent_map = self.get_parent_map([key])

938

if not parent_map:

939

raise errors.RevisionNotPresent(key, self)

940

if parent_map[key] is not None:

941

search = graph._make_breadth_first_searcher([key])

942

keys = set()

943

while True:

944

try:

945

present, ghosts = search.next_with_ghosts()

946

except StopIteration:

947

break

948

keys.update(present)

949

parent_map = self.get_parent_map(keys)

950

else:

951

keys = [key]

952

parent_map = {key:()}

953

head_cache = _mod_graph.FrozenHeadsCache(graph)

954

parent_cache = {}

955

reannotate = annotate.reannotate

956

for record in self.get_record_stream(keys, 'topological', True):

957

key = record.key

958

chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

959

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

960

parent_cache[key] = list(

961

reannotate(parent_lines, chunks, key, None, head_cache))

962

return parent_cache[key]

963

964

def check(self, progress_bar=None):

1270

ann = annotate.Annotator(self)

1271

return ann.annotate_flat(key)

1272

1273

def get_annotator(self):

1274

return annotate.Annotator(self)

1275

1276

def check(self, progress_bar=None, keys=None):

965

1277

"""See VersionedFiles.check()."""

966

keys = self.keys()

967

for record in self.get_record_stream(keys, 'unordered', True):

968

record.get_bytes_as('fulltext')

1278

if keys is None:

1279

keys = self.keys()

1280

for record in self.get_record_stream(keys, 'unordered', True):

1281

record.get_bytes_as('fulltext')

1282

else:

1283

return self.get_record_stream(keys, 'unordered', True)

969

1284

970

1285

def _check_add(self, key, lines, random_id, check_content):

971

1286

"""check that version_id and lines are safe to add."""

982

1297

self._check_lines_not_unicode(lines)

983

1298

self._check_lines_are_lines(lines)

984

1299

1300

def get_known_graph_ancestry(self, keys):

1301

"""Get a KnownGraph instance with the ancestry of keys."""

1302

# Note that this is identical to

1303

# KnitVersionedFiles.get_known_graph_ancestry, but they don't share

1304

# ancestry.

1305

parent_map, missing_keys = self._index.find_ancestry(keys)

1306

for fallback in self._fallback_vfs:

1307

if not missing_keys:

1308

break

1309

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1310

missing_keys)

1311

parent_map.update(f_parent_map)

1312

missing_keys = f_missing_keys

1313

kg = _mod_graph.KnownGraph(parent_map)

1314

return kg

1315

985

1316

def get_parent_map(self, keys):

986

1317

"""Get a map of the graph parents of keys.

987

1318

1014

1345

missing.difference_update(set(new_result))

1015

1346

return result, source_results

1016

1347

1017

def _get_block(self, index_memo):

1018

read_memo = index_memo[0:3]

1019

# get the group:

1020

try:

1021

block = self._group_cache[read_memo]

1022

except KeyError:

1023

# read the group

1024

zdata = self._access.get_raw_records([read_memo]).next()

1025

# decompress - whole thing - this is not a bug, as it

1026

# permits caching. We might want to store the partially

1027

# decompresed group and decompress object, so that recent

1028

# texts are not penalised by big groups.

1029

block = GroupCompressBlock.from_bytes(zdata)

1030

self._group_cache[read_memo] = block

1031

# cheapo debugging:

1032

# print len(zdata), len(plain)

1033

# parse - requires split_lines, better to have byte offsets

1034

# here (but not by much - we only split the region for the

1035

# recipe, and we often want to end up with lines anyway.

1036

return block

1348

def _get_blocks(self, read_memos):

1349

"""Get GroupCompressBlocks for the given read_memos.

1350

1351

:returns: a series of (read_memo, block) pairs, in the order they were

1352

originally passed.

1353

"""

1354

cached = {}

1355

for read_memo in read_memos:

1356

try:

1357

block = self._group_cache[read_memo]

1358

except KeyError:

1359

pass

1360

else:

1361

cached[read_memo] = block

1362

not_cached = []

1363

not_cached_seen = set()

1364

for read_memo in read_memos:

1365

if read_memo in cached:

1366

# Don't fetch what we already have

1367

continue

1368

if read_memo in not_cached_seen:

1369

# Don't try to fetch the same data twice

1370

continue

1371

not_cached.append(read_memo)

1372

not_cached_seen.add(read_memo)

1373

raw_records = self._access.get_raw_records(not_cached)

1374

for read_memo in read_memos:

1375

try:

1376

yield read_memo, cached[read_memo]

1377

except KeyError:

1378

# Read the block, and cache it.

1379

zdata = raw_records.next()

1380

block = GroupCompressBlock.from_bytes(zdata)

1381

self._group_cache[read_memo] = block

1382

cached[read_memo] = block

1383

yield read_memo, block

1037

1384

1038

1385

def get_missing_compression_parent_keys(self):

1039

1386

"""Return the keys of missing compression parents.

1205

1552

unadded_keys, source_result)

1206

1553

for key in missing:

1207

1554

yield AbsentContentFactory(key)

1208

manager = None

1209

last_read_memo = None

1210

# TODO: This works fairly well at batching up existing groups into a

1211

# streamable format, and possibly allowing for taking one big

1212

# group and splitting it when it isn't fully utilized.

1213

# However, it doesn't allow us to find under-utilized groups and

1214

# combine them into a bigger group on the fly.

1215

# (Consider the issue with how chk_map inserts texts

1216

# one-at-a-time.) This could be done at insert_record_stream()

1217

# time, but it probably would decrease the number of

1218

# bytes-on-the-wire for fetch.

1555

# Batch up as many keys as we can until either:

1556

# - we encounter an unadded ref, or

1557

# - we run out of keys, or

1558

# - the total bytes to retrieve for this batch > BATCH_SIZE

1559

batcher = _BatchingBlockFetcher(self, locations)

1219

1560

for source, keys in source_keys:

1220

1561

if source is self:

1221

1562

for key in keys:

1222

1563

if key in self._unadded_refs:

1223

if manager is not None:

1224

for factory in manager.get_record_stream():

1225

yield factory

1226

last_read_memo = manager = None

1564

# Flush batch, then yield unadded ref from

1565

# self._compressor.

1566

for factory in batcher.yield_factories(full_flush=True):

1567

yield factory

1227

1568

bytes, sha1 = self._compressor.extract(key)

1228

1569

parents = self._unadded_refs[key]

1229

1570

yield FulltextContentFactory(key, parents, sha1, bytes)

1230

else:

1231

index_memo, _, parents, (method, _) = locations[key]

1232

read_memo = index_memo[0:3]

1233

if last_read_memo != read_memo:

1234

# We are starting a new block. If we have a

1235

# manager, we have found everything that fits for

1236

# now, so yield records

1237

if manager is not None:

1238

for factory in manager.get_record_stream():

1239

yield factory

1240

# Now start a new manager

1241

block = self._get_block(index_memo)

1242

manager = _LazyGroupContentManager(block)

1243

last_read_memo = read_memo

1244

start, end = index_memo[3:5]

1245

manager.add_factory(key, parents, start, end)

1571

continue

1572

if batcher.add_key(key) > BATCH_SIZE:

1573

# Ok, this batch is big enough. Yield some results.

1574

for factory in batcher.yield_factories():

1575

yield factory

1246

1576

else:

1247

if manager is not None:

1248

for factory in manager.get_record_stream():

1249

yield factory

1250

last_read_memo = manager = None

1577

for factory in batcher.yield_factories(full_flush=True):

1578

yield factory

1251

1579

for record in source.get_record_stream(keys, ordering,

1252

1580

include_delta_closure):

1253

1581

yield record

1254

if manager is not None:

1255

for factory in manager.get_record_stream():

1256

yield factory

1582

for factory in batcher.yield_factories(full_flush=True):

1583

yield factory

1257

1584

1258

1585

def get_sha1s(self, keys):

1259

1586

"""See VersionedFiles.get_sha1s()."""

1274

1601

:return: None

1275

1602

:seealso VersionedFiles.get_record_stream:

1276

1603

"""

1277

for _ in self._insert_record_stream(stream):

1604

# XXX: Setting random_id=True makes

1605

# test_insert_record_stream_existing_keys fail for groupcompress and

1606

# groupcompress-nograph, this needs to be revisited while addressing

1607

# 'bzr branch' performance issues.

1608

for _ in self._insert_record_stream(stream, random_id=False):

1278

1609

pass

1279

1610

1280

1611

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1321

1652

self._compressor = GroupCompressor()

1322

1653

1323

1654

last_prefix = None

1324

last_fulltext_len = None

1325

1655

max_fulltext_len = 0

1326

1656

max_fulltext_prefix = None

1327

1657

insert_manager = None

1328

1658

block_start = None

1329

1659

block_length = None

1660

# XXX: TODO: remove this, it is just for safety checking for now

1661

inserted_keys = set()

1662

reuse_this_block = reuse_blocks

1330

1663

for record in stream:

1331

1664

# Raise an error when a record is missing.

1332

1665

if record.storage_kind == 'absent':

1333

1666

raise errors.RevisionNotPresent(record.key, self)

1667

if random_id:

1668

if record.key in inserted_keys:

1669

trace.note('Insert claimed random_id=True,'

1670

' but then inserted %r two times', record.key)

1671

continue

1672

inserted_keys.add(record.key)

1334

1673

if reuse_blocks:

1335

1674

# If the reuse_blocks flag is set, check to see if we can just

1336

1675

# copy a groupcompress block as-is.

1676

# We only check on the first record (groupcompress-block) not

1677

# on all of the (groupcompress-block-ref) entries.

1678

# The reuse_this_block flag is then kept for as long as

1679

if record.storage_kind == 'groupcompress-block':

1680

# Check to see if we really want to re-use this block

1681

insert_manager = record._manager

1682

reuse_this_block = insert_manager.check_is_well_utilized()

1683

else:

1684

reuse_this_block = False

1685

if reuse_this_block:

1686

# We still want to reuse this block

1337

1687

if record.storage_kind == 'groupcompress-block':

1338

1688

# Insert the raw block into the target repo

1339

1689

insert_manager = record._manager

1340

insert_manager._check_rebuild_block()

1341

1690

bytes = record._manager._block.to_bytes()

1342

1691

_, start, length = self._access.add_raw_records(

1343

1692

[(None, len(bytes))], bytes)[0]

1346

1695

block_length = length

1347

1696

if record.storage_kind in ('groupcompress-block',

1348

1697

'groupcompress-block-ref'):

1349

assert insert_manager is not None

1350

assert record._manager is insert_manager

1698

if insert_manager is None:

1699

raise AssertionError('No insert_manager set')

1700

if insert_manager is not record._manager:

1701

raise AssertionError('insert_manager does not match'

1702

' the current record, we cannot be positive'

1703

' that the appropriate content was inserted.'

1704

)

1351

1705

value = "%d %d %d %d" % (block_start, block_length,

1352

1706

record._start, record._end)

1353

1707

nodes = [(record.key, value, (record.parents,))]

1371

1725

if max_fulltext_len < len(bytes):

1372

1726

max_fulltext_len = len(bytes)

1373

1727

max_fulltext_prefix = prefix

1374

(found_sha1, start_point, end_point, type,

1375

length) = self._compressor.compress(record.key,

1376

bytes, record.sha1, soft=soft,

1377

nostore_sha=nostore_sha)

1378

# delta_ratio = float(len(bytes)) / length

1728

(found_sha1, start_point, end_point,

1729

type) = self._compressor.compress(record.key,

1730

bytes, record.sha1, soft=soft,

1731

nostore_sha=nostore_sha)

1732

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1379

1733

# Check if we want to continue to include that text

1380

1734

if (prefix == max_fulltext_prefix

1381

1735

and end_point < 2 * max_fulltext_len):

1394

1748

self._compressor.pop_last()

1395

1749

flush()

1396

1750

max_fulltext_len = len(bytes)

1397

(found_sha1, start_point, end_point, type,

1398

length) = self._compressor.compress(record.key,

1399

bytes, record.sha1)

1400

last_fulltext_len = length

1751

(found_sha1, start_point, end_point,

1752

type) = self._compressor.compress(record.key, bytes,

1753

record.sha1)

1401

1754

if record.key[-1] is None:

1402

1755

key = record.key[:-1] + ('sha1:' + found_sha1,)

1403

1756

else:

1431

1784

1432

1785

:return: An iterator over (line, key).

1433

1786

"""

1434

if pb is None:

1435

pb = progress.DummyProgress()

1436

1787

keys = set(keys)

1437

1788

total = len(keys)

1438

1789

# we don't care about inclusions, the caller cares.

1442

1793

'unordered', True)):

1443

1794

# XXX: todo - optimise to use less than full texts.

1444

1795

key = record.key

1445

pb.update('Walking content', key_idx, total)

1796

if pb is not None:

1797

pb.update('Walking content', key_idx, total)

1446

1798

if record.storage_kind == 'absent':

1447

1799

raise errors.RevisionNotPresent(key, self)

1448

1800

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1449

1801

for line in lines:

1450

1802

yield line, key

1451

pb.update('Walking content', total, total)

1803

if pb is not None:

1804

pb.update('Walking content', total, total)

1452

1805

1453

1806

def keys(self):

1454

1807

"""See VersionedFiles.keys."""

1465

1818

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1466

1819

1467

1820

def __init__(self, graph_index, is_locked, parents=True,

1468

add_callback=None):

1821

add_callback=None, track_external_parent_refs=False,

1822

inconsistency_fatal=True, track_new_keys=False):

1469

1823

"""Construct a _GCGraphIndex on a graph_index.

1470

1824

1471

1825

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1476

1830

:param add_callback: If not None, allow additions to the index and call

1477

1831

this callback with a list of added GraphIndex nodes:

1478

1832

[(node, value, node_refs), ...]

1833

:param track_external_parent_refs: As keys are added, keep track of the

1834

keys they reference, so that we can query get_missing_parents(),

1835

etc.

1836

:param inconsistency_fatal: When asked to add records that are already

1837

present, and the details are inconsistent with the existing

1838

record, raise an exception instead of warning (and skipping the

1839

record).

1479

1840

"""

1480

1841

self._add_callback = add_callback

1481

1842

self._graph_index = graph_index

1482

1843

self._parents = parents

1483

1844

self.has_graph = parents

1484

1845

self._is_locked = is_locked

1846

self._inconsistency_fatal = inconsistency_fatal

1847

if track_external_parent_refs:

1848

self._key_dependencies = knit._KeyRefs(

1849

track_new_keys=track_new_keys)

1850

else:

1851

self._key_dependencies = None

1485

1852

1486

1853

def add_records(self, records, random_id=False):

1487

1854

"""Add multiple records to the index.

1508

1875

if refs:

1509

1876

for ref in refs:

1510

1877

if ref:

1511

raise KnitCorrupt(self,

1878

raise errors.KnitCorrupt(self,

1512

1879

"attempt to add node with parents "

1513

1880

"in parentless index.")

1514

1881

refs = ()

1519

1886

present_nodes = self._get_entries(keys)

1520

1887

for (index, key, value, node_refs) in present_nodes:

1521

1888

if node_refs != keys[key][1]:

1522

raise errors.KnitCorrupt(self, "inconsistent details in add_records"

1523

": %s %s" % ((value, node_refs), keys[key]))

1889

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1890

if self._inconsistency_fatal:

1891

raise errors.KnitCorrupt(self, "inconsistent details"

1892

" in add_records: %s" %

1893

details)

1894

else:

1895

trace.warning("inconsistent details in skipped"

1896

" record: %s", details)

1524

1897

del keys[key]

1525

1898

changed = True

1526

1899

if changed:

1532

1905

for key, (value, node_refs) in keys.iteritems():

1533

1906

result.append((key, value))

1534

1907

records = result

1908

key_dependencies = self._key_dependencies

1909

if key_dependencies is not None:

1910

if self._parents:

1911

for key, value, refs in records:

1912

parents = refs[0]

1913

key_dependencies.add_references(key, parents)

1914

else:

1915

for key, value, refs in records:

1916

new_keys.add_key(key)

1535

1917

self._add_callback(records)

1536

1918

1537

1919

def _check_read(self):

1566

1948

if check_present:

1567

1949

missing_keys = keys.difference(found_keys)

1568

1950

if missing_keys:

1569

raise RevisionNotPresent(missing_keys.pop(), self)

1951

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1952

1953

def find_ancestry(self, keys):

1954

"""See CombinedGraphIndex.find_ancestry"""

1955

return self._graph_index.find_ancestry(keys, 0)

1570

1956

1571

1957

def get_parent_map(self, keys):

1572

1958

"""Get a map of the parents of keys.

1586

1972

result[node[1]] = None

1587

1973

return result

1588

1974

1975

def get_missing_parents(self):

1976

"""Return the keys of missing parents."""

1977

# Copied from _KnitGraphIndex.get_missing_parents

1978

# We may have false positives, so filter those out.

1979

self._key_dependencies.satisfy_refs_for_keys(

1980

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1981

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1982

1589

1983

def get_build_details(self, keys):

1590

1984

"""Get the various build details for keys.

1591

1985

1637

2031

delta_end = int(bits[3])

1638

2032

return node[0], start, stop, basis_end, delta_end

1639

2033

2034

def scan_unvalidated_index(self, graph_index):

2035

"""Inform this _GCGraphIndex that there is an unvalidated index.

2036

2037

This allows this _GCGraphIndex to keep track of any missing

2038

compression parents we may want to have filled in to make those

2039

indices valid. It also allows _GCGraphIndex to track any new keys.

2040

2041

:param graph_index: A GraphIndex

2042

"""

2043

key_dependencies = self._key_dependencies

2044

if key_dependencies is None:

2045

return

2046

for node in graph_index.iter_all_entries():

2047

# Add parent refs from graph_index (and discard parent refs

2048

# that the graph_index has).

2049

key_dependencies.add_references(node[1], node[3][0])

2050

1640

2051

1641

2052

from bzrlib._groupcompress_py import (

1642

2053

apply_delta,

1643

2054

apply_delta_to_source,

1644

2055

encode_base128_int,

1645

2056

decode_base128_int,

2057

decode_copy_instruction,

1646

2058

LinesDeltaIndex,

1647

2059

)

1648

2060

try:

1654

2066

decode_base128_int,

1655

2067

)

1656

2068

GroupCompressor = PyrexGroupCompressor

1657

except ImportError:

2069

except ImportError, e:

2070

osutils.failed_to_load_extension(e)

1658

2071

GroupCompressor = PythonGroupCompressor

1659

2072

Older »