~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Date: 2010-05-11 10:45:26 UTC
mto: This revision was merged to the branch mainline in revision 5225.
Revision ID: john@arbash-meinel.com-20100511104526-zxnstcxta22hzw2n

Implement a compiled extension for parsing the text key out of a CHKInventory value.

Related to bug #562666. This seems to shave 5-10% out of the time spent doing a complete
branch of bzr.dev/launchpad/etc.

files added:
.testr.conf

MANIFEST.in

NEWS-template.txt

README_BDIST_RPM

apport

apport/README

apport/bzr-crashdb.conf

apport/source_bzr.py

bzrlib/_export_c_api.h

bzrlib/_import_c_api.h

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/cleanup.py

bzrlib/cmdline.py

bzrlib/crash.py

bzrlib/doc_generate/sphinx_conf.py

bzrlib/help_topics/en/location-alias.txt

bzrlib/help_topics/es

bzrlib/plugins/launchpad/lp_api.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/test_lp_api.py

bzrlib/plugins/news_merge

bzrlib/plugins/news_merge/README

bzrlib/plugins/news_merge/__init__.py

bzrlib/plugins/news_merge/news_merge.py

bzrlib/plugins/news_merge/parser.py

bzrlib/plugins/news_merge/tests

bzrlib/plugins/news_merge/tests/__init__.py

bzrlib/plugins/news_merge/tests/test_news_merge.py

bzrlib/static_tuple.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_lookup_revision.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/commands/test_revert.py

bzrlib/tests/features.py

bzrlib/tests/per_bzrdir_colo

bzrlib/tests/per_bzrdir_colo/__init__.py

bzrlib/tests/per_bzrdir_colo/test_supported.py

bzrlib/tests/per_bzrdir_colo/test_unsupported.py

bzrlib/tests/per_foreign_vcs

bzrlib/tests/per_foreign_vcs/__init__.py

bzrlib/tests/per_foreign_vcs/test_branch.py

bzrlib/tests/per_foreign_vcs/test_repository.py

bzrlib/tests/per_merger.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_uifactory

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/script.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_cmdline.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_patches_data/binary-after-normal.patch

bzrlib/tests/test_patches_data/binary.patch

bzrlib/tests/test_script.py

bzrlib/tests/test_server.py

bzrlib/transport/pathfilter.py

contrib/debian

contrib/debian/default

contrib/debian/init.d

doc/Bazaar-Logo-For-Manuals.png

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr-doc.css

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/apport.txt

doc/developers/check.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/implementation-notes.txt

doc/developers/index-plain.txt

doc/developers/miscellaneous-notes.txt

doc/developers/plans.txt

doc/developers/principles.txt

doc/developers/specifications.txt

doc/developers/win32_build_setup.txt

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide/advanced.txt

doc/en/admin-guide/backup.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/index-plain.txt

doc/en/admin-guide/integration.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/security.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/whats-new

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/quick-reference/index.txt

doc/es/user-guide/index-plain.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/quick-reference/index.txt

doc/ru/user-guide/index-plain.txt

tools/generate_release_notes.py

tools/package_docs.py

tools/packaging/update-control.sh

files removed:
bzrlib/textui.py

bzrlib/weave_commands.py

contrib/fortune

doc/bazaar-vcs.org.kid

doc/en/developer-guide

doc/en/user-guide/index.txt

doc/es/developer-guide

doc/es/release-notes

doc/es/user-reference

tools/biobench.py

tools/history2revfiles.py

files renamed:
bzrlib/help_topics/en/conflicts.txt => bzrlib/help_topics/en/conflict-types.txt

doc/es/user-guide/resolving_conflicts.txt => bzrlib/help_topics/es/conflict-types.txt

bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/test_bencode.py => bzrlib/tests/test__bencode.py

doc/en/developer-guide/HACKING.txt => doc/developers/HACKING.txt

doc/en/quick-reference/Makefile => doc/en/_static/en/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/en/_static/en/bzr-en-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/en/_static/en/bzr-en-quick-reference.svg

doc/es/quick-reference/Makefile => doc/es/_static/es/Makefile

doc/es/quick-reference/quick-start-summary.pdf => doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/quick-reference/quick-start-summary.png => doc/es/_static/es/bzr-es-quick-reference.png

doc/es/quick-reference/quick-start-summary.svg => doc/es/_static/es/bzr-es-quick-reference.svg

doc/index.es.txt => doc/es/index.txt

doc/ru/quick-reference/Makefile => doc/ru/_static/ru/Makefile

doc/ru/quick-reference/quick-start-summary.pdf => doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/quick-reference/quick-start-summary.png => doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/quick-reference/quick-start-summary.svg => doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/index.ru.txt => doc/ru/index.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/bencode.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/foreign.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/treeshape.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/_bencode_py.py

bzrlib/version.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/default.css

doc/developers/add.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/bug-handling.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/update.txt

doc/en/admin-guide/index.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/es/mini-tutorial/index.txt

doc/es/user-guide/index.txt

doc/es/user-guide/version_info.txt

doc/index.txt

doc/ru/mini-tutorial/index.txt

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/zen.txt

profile_imports.py

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/win32/build_release.py

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/info.txt

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

knit,

osutils,

pack,

static_tuple,

trace,

)

from bzrlib.graph import Graph

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

VersionedFiles,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = [item]

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

118

120

:param num_bytes: Ensure that we have extracted at least num_bytes of

119

121

content. If None, consume everything

120

122

"""

121

# TODO: If we re-use the same content block at different times during

122

# get_record_stream(), it is possible that the first pass will

123

# get inserted, triggering an extract/_ensure_content() which

124

# will get rid of _z_content. And then the next use of the block

125

# will try to access _z_content (to send it over the wire), and

126

# fail because it is already extracted. Consider never releasing

127

# _z_content because of this.

123

if self._content_length is None:

124

raise AssertionError('self._content_length should never be None')

128

125

if num_bytes is None:

129

126

num_bytes = self._content_length

130

127

elif (self._content_length is not None

147

144

self._content = pylzma.decompress(self._z_content)

148

145

elif self._compressor_name == 'zlib':

149

146

# Start a zlib decompressor

150

if num_bytes is None:

147

if num_bytes * 4 > self._content_length * 3:

148

# If we are requesting more that 3/4ths of the content,

149

# just extract the whole thing in a single pass

150

num_bytes = self._content_length

151

self._content = zlib.decompress(self._z_content)

152

else:

153

self._z_content_decompressor = zlib.decompressobj()

155

# that the rest of the code is simplified

156

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

if not self._z_content_decompressor.unconsumed_tail:

159

self._z_content_decompressor = None

158

160

else:

159

161

raise AssertionError('Unknown compressor: %r'

160

162

% self._compressor_name)

162

164

# 'unconsumed_tail'

163

165

164

166

# Do we have enough bytes already?

165

if num_bytes is not None and len(self._content) >= num_bytes:

166

return

167

if num_bytes is None and self._z_content_decompressor is None:

168

# We must have already decompressed everything

167

if len(self._content) >= num_bytes:

169

168

return

170

169

# If we got this far, and don't have a decompressor, something is wrong

171

170

if self._z_content_decompressor is None:

172

171

raise AssertionError(

173

172

'No decompressor to decompress %d bytes' % num_bytes)

174

173

remaining_decomp = self._z_content_decompressor.unconsumed_tail

175

if num_bytes is None:

176

if remaining_decomp:

177

# We don't know how much is left, but we'll decompress it all

178

self._content += self._z_content_decompressor.decompress(

179

remaining_decomp)

180

# Note: There's what I consider a bug in zlib.decompressobj

181

# If you pass back in the entire unconsumed_tail, only

182

# this time you don't pass a max-size, it doesn't

183

# change the unconsumed_tail back to None/''.

184

# However, we know we are done with the whole stream

185

self._z_content_decompressor = None

186

# XXX: Why is this the only place in this routine we set this?

187

self._content_length = len(self._content)

188

else:

189

if not remaining_decomp:

190

raise AssertionError('Nothing left to decompress')

191

needed_bytes = num_bytes - len(self._content)

192

# We always set max_size to 32kB over the minimum needed, so that

193

# zlib will give us as much as we really want.

194

# TODO: If this isn't good enough, we could make a loop here,

195

# that keeps expanding the request until we get enough

196

self._content += self._z_content_decompressor.decompress(

197

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

198

if len(self._content) < num_bytes:

199

raise AssertionError('%d bytes wanted, only %d available'

200

% (num_bytes, len(self._content)))

201

if not self._z_content_decompressor.unconsumed_tail:

202

# The stream is finished

203

self._z_content_decompressor = None

174

if not remaining_decomp:

175

raise AssertionError('Nothing left to decompress')

176

needed_bytes = num_bytes - len(self._content)

177

# We always set max_size to 32kB over the minimum needed, so that

178

# zlib will give us as much as we really want.

179

# TODO: If this isn't good enough, we could make a loop here,

180

# that keeps expanding the request until we get enough

181

self._content += self._z_content_decompressor.decompress(

182

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

183

if len(self._content) < num_bytes:

184

raise AssertionError('%d bytes wanted, only %d available'

185

% (num_bytes, len(self._content)))

186

if not self._z_content_decompressor.unconsumed_tail:

187

# The stream is finished

188

self._z_content_decompressor = None

204

189

205

190

def _parse_bytes(self, bytes, pos):

206

191

"""Read the various lengths from the header.

456

441

# There are code paths that first extract as fulltext, and then

457

442

# extract as storage_kind (smart fetch). So we don't break the

458

443

# refcycle here, but instead in manager.get_record_stream()

459

# self._manager = None

460

444

if storage_kind == 'fulltext':

461

445

return self._bytes

462

446

else:

468

452

class _LazyGroupContentManager(object):

469

453

"""This manages a group of _LazyGroupCompressFactory objects."""

470

454

455

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

456

# current size, and still be considered

457

# resuable

458

_full_block_size = 4*1024*1024

459

_full_mixed_block_size = 2*1024*1024

460

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

461

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

462

471

463

def __init__(self, block):

472

464

self._block = block

473

465

# We need to preserve the ordering

545

537

# time (self._block._content) is a little expensive.

546

538

self._block._ensure_content(self._last_byte)

547

539

548

def _check_rebuild_block(self):

540

def _check_rebuild_action(self):

549

541

"""Check to see if our block should be repacked."""

550

542

total_bytes_used = 0

551

543

last_byte_used = 0

552

544

for factory in self._factories:

553

545

total_bytes_used += factory._end - factory._start

554

last_byte_used = max(last_byte_used, factory._end)

555

# If we are using most of the bytes from the block, we have nothing

556

# else to check (currently more that 1/2)

546

if last_byte_used < factory._end:

547

last_byte_used = factory._end

548

# If we are using more than half of the bytes from the block, we have

549

# nothing else to check

557

550

if total_bytes_used * 2 >= self._block._content_length:

558

return

559

# Can we just strip off the trailing bytes? If we are going to be

560

# transmitting more than 50% of the front of the content, go ahead

551

return None, last_byte_used, total_bytes_used

552

# We are using less than 50% of the content. Is the content we are

553

# using at the beginning of the block? If so, we can just trim the

554

# tail, rather than rebuilding from scratch.

561

555

if total_bytes_used * 2 > last_byte_used:

562

self._trim_block(last_byte_used)

563

return

556

return 'trim', last_byte_used, total_bytes_used

564

557

565

558

# We are using a small amount of the data, and it isn't just packed

566

559

# nicely at the front, so rebuild the content.

573

566

# expanding many deltas into fulltexts, as well.

574

567

# If we build a cheap enough 'strip', then we could try a strip,

575

568

# if that expands the content, we then rebuild.

576

self._rebuild_block()

569

return 'rebuild', last_byte_used, total_bytes_used

570

571

def check_is_well_utilized(self):

572

"""Is the current block considered 'well utilized'?

573

574

This heuristic asks if the current block considers itself to be a fully

575

developed group, rather than just a loose collection of data.

576

"""

577

if len(self._factories) == 1:

578

# A block of length 1 could be improved by combining with other

579

# groups - don't look deeper. Even larger than max size groups

580

# could compress well with adjacent versions of the same thing.

581

return False

582

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

583

block_size = self._block._content_length

584

if total_bytes_used < block_size * self._max_cut_fraction:

585

# This block wants to trim itself small enough that we want to

586

# consider it under-utilized.

587

return False

588

# TODO: This code is meant to be the twin of _insert_record_stream's

589

# 'start_new_block' logic. It would probably be better to factor

590

# out that logic into a shared location, so that it stays

591

# together better

592

# We currently assume a block is properly utilized whenever it is >75%

593

# of the size of a 'full' block. In normal operation, a block is

594

# considered full when it hits 4MB of same-file content. So any block

595

# >3MB is 'full enough'.

596

# The only time this isn't true is when a given block has large-object

597

# content. (a single file >4MB, etc.)

598

# Under these circumstances, we allow a block to grow to

599

# 2 x largest_content. Which means that if a given block had a large

600

# object, it may actually be under-utilized. However, given that this

601

# is 'pack-on-the-fly' it is probably reasonable to not repack large

602

# content blobs on-the-fly. Note that because we return False for all

603

# 1-item blobs, we will repack them; we may wish to reevaluate our

604

# treatment of large object blobs in the future.

605

if block_size >= self._full_enough_block_size:

606

return True

607

# If a block is <3MB, it still may be considered 'full' if it contains

608

# mixed content. The current rule is 2MB of mixed content is considered

609

# full. So check to see if this block contains mixed content, and

610

# set the threshold appropriately.

611

common_prefix = None

612

for factory in self._factories:

613

prefix = factory.key[:-1]

614

if common_prefix is None:

615

common_prefix = prefix

616

elif prefix != common_prefix:

617

# Mixed content, check the size appropriately

618

if block_size >= self._full_enough_mixed_block_size:

619

return True

620

break

621

# The content failed both the mixed check and the single-content check

622

# so obviously it is not fully utilized

623

# TODO: there is one other constraint that isn't being checked

624

# namely, that the entries in the block are in the appropriate

625

# order. For example, you could insert the entries in exactly

626

# reverse groupcompress order, and we would think that is ok.

627

# (all the right objects are in one group, and it is fully

628

# utilized, etc.) For now, we assume that case is rare,

629

# especially since we should always fetch in 'groupcompress'

630

# order.

631

return False

632

633

def _check_rebuild_block(self):

634

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

635

if action is None:

636

return

637

if action == 'trim':

638

self._trim_block(last_byte_used)

639

elif action == 'rebuild':

640

self._rebuild_block()

641

else:

642

raise ValueError('unknown rebuild action: %r' % (action,))

577

643

578

644

def _wire_bytes(self):

579

645

"""Return a byte stream suitable for transmitting over the wire."""

975

1041

versioned_files.stream.close()

976

1042

977

1043

1044

class _BatchingBlockFetcher(object):

1045

"""Fetch group compress blocks in batches.

1046

1047

:ivar total_bytes: int of expected number of bytes needed to fetch the

1048

currently pending batch.

1049

"""

1050

1051

def __init__(self, gcvf, locations):

1052

self.gcvf = gcvf

1053

self.locations = locations

1054

self.keys = []

1055

self.batch_memos = {}

1056

self.memos_to_get = []

1057

self.total_bytes = 0

1058

self.last_read_memo = None

1059

self.manager = None

1060

1061

def add_key(self, key):

1062

"""Add another to key to fetch.

1063

1064

:return: The estimated number of bytes needed to fetch the batch so

1065

far.

1066

"""

1067

self.keys.append(key)

1068

index_memo, _, _, _ = self.locations[key]

1069

read_memo = index_memo[0:3]

1070

# Three possibilities for this read_memo:

1071

# - it's already part of this batch; or

1072

# - it's not yet part of this batch, but is already cached; or

1073

# - it's not yet part of this batch and will need to be fetched.

1074

if read_memo in self.batch_memos:

1075

# This read memo is already in this batch.

1076

return self.total_bytes

1077

try:

1078

cached_block = self.gcvf._group_cache[read_memo]

1079

except KeyError:

1080

# This read memo is new to this batch, and the data isn't cached

1081

# either.

1082

self.batch_memos[read_memo] = None

1083

self.memos_to_get.append(read_memo)

1084

byte_length = read_memo[2]

1085

self.total_bytes += byte_length

1086

else:

1087

# This read memo is new to this batch, but cached.

1088

# Keep a reference to the cached block in batch_memos because it's

1089

# certain that we'll use it when this batch is processed, but

1090

# there's a risk that it would fall out of _group_cache between now

1091

# and then.

1092

self.batch_memos[read_memo] = cached_block

1093

return self.total_bytes

1094

1095

def _flush_manager(self):

1096

if self.manager is not None:

1097

for factory in self.manager.get_record_stream():

1098

yield factory

1099

self.manager = None

1100

self.last_read_memo = None

1101

1102

def yield_factories(self, full_flush=False):

1103

"""Yield factories for keys added since the last yield. They will be

1104

returned in the order they were added via add_key.

1105

1106

:param full_flush: by default, some results may not be returned in case

1107

they can be part of the next batch. If full_flush is True, then

1108

all results are returned.

1109

"""

1110

if self.manager is None and not self.keys:

1111

return

1112

# Fetch all memos in this batch.

1113

blocks = self.gcvf._get_blocks(self.memos_to_get)

1114

# Turn blocks into factories and yield them.

1115

memos_to_get_stack = list(self.memos_to_get)

1116

memos_to_get_stack.reverse()

1117

for key in self.keys:

1118

index_memo, _, parents, _ = self.locations[key]

1119

read_memo = index_memo[:3]

1120

if self.last_read_memo != read_memo:

1121

# We are starting a new block. If we have a

1122

# manager, we have found everything that fits for

1123

# now, so yield records

1124

for factory in self._flush_manager():

1125

yield factory

1126

# Now start a new manager.

1127

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1128

# The next block from _get_blocks will be the block we

1129

# need.

1130

block_read_memo, block = blocks.next()

1131

if block_read_memo != read_memo:

1132

raise AssertionError(

1133

"block_read_memo out of sync with read_memo"

1134

"(%r != %r)" % (block_read_memo, read_memo))

1135

self.batch_memos[read_memo] = block

1136

memos_to_get_stack.pop()

1137

else:

1138

block = self.batch_memos[read_memo]

1139

self.manager = _LazyGroupContentManager(block)

1140

self.last_read_memo = read_memo

1141

start, end = index_memo[3:5]

1142

self.manager.add_factory(key, parents, start, end)

1143

if full_flush:

1144

for factory in self._flush_manager():

1145

yield factory

1146

del self.keys[:]

1147

self.batch_memos.clear()

1148

del self.memos_to_get[:]

1149

self.total_bytes = 0

1150

1151

978

1152

class GroupCompressVersionedFiles(VersionedFiles):

979

1153

"""A group-compress based VersionedFiles implementation."""

980

1154

981

def __init__(self, index, access, delta=True):

1155

def __init__(self, index, access, delta=True, _unadded_refs=None):

982

1156

"""Create a GroupCompressVersionedFiles object.

983

1157

984

1158

:param index: The index object storing access and graph data.

985

1159

:param access: The access object storing raw data.

986

1160

:param delta: Whether to delta compress or just entropy compress.

1161

:param _unadded_refs: private parameter, don't use.

987

1162

"""

988

1163

self._index = index

989

1164

self._access = access

990

1165

self._delta = delta

991

self._unadded_refs = {}

1166

if _unadded_refs is None:

1167

_unadded_refs = {}

1168

self._unadded_refs = _unadded_refs

992

1169

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

993

1170

self._fallback_vfs = []

994

1171

1172

def without_fallbacks(self):

1173

"""Return a clone of this object without any fallbacks configured."""

1174

return GroupCompressVersionedFiles(self._index, self._access,

1175

self._delta, _unadded_refs=dict(self._unadded_refs))

1176

995

1177

def add_lines(self, key, parents, lines, parent_texts=None,

996

1178

left_matching_blocks=None, nostore_sha=None, random_id=False,

997

1179

check_content=True):

1075

1257

def get_annotator(self):

1076

1258

return annotate.Annotator(self)

1077

1259

1078

def check(self, progress_bar=None):

1260

def check(self, progress_bar=None, keys=None):

1079

1261

"""See VersionedFiles.check()."""

1080

keys = self.keys()

1081

for record in self.get_record_stream(keys, 'unordered', True):

1082

record.get_bytes_as('fulltext')

1262

if keys is None:

1263

keys = self.keys()

1264

for record in self.get_record_stream(keys, 'unordered', True):

1265

record.get_bytes_as('fulltext')

1266

else:

1267

return self.get_record_stream(keys, 'unordered', True)

1268

1269

def clear_cache(self):

1270

"""See VersionedFiles.clear_cache()"""

1271

self._group_cache.clear()

1272

self._index._graph_index.clear_cache()

1273

self._index._int_cache.clear()

1083

1274

1084

1275

def _check_add(self, key, lines, random_id, check_content):

1085

1276

"""check that version_id and lines are safe to add."""

1096

1287

self._check_lines_not_unicode(lines)

1097

1288

self._check_lines_are_lines(lines)

1098

1289

1290

def get_known_graph_ancestry(self, keys):

1291

"""Get a KnownGraph instance with the ancestry of keys."""

1292

# Note that this is identical to

1293

# KnitVersionedFiles.get_known_graph_ancestry, but they don't share

1294

# ancestry.

1295

parent_map, missing_keys = self._index.find_ancestry(keys)

1296

for fallback in self._fallback_vfs:

1297

if not missing_keys:

1298

break

1299

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1300

missing_keys)

1301

parent_map.update(f_parent_map)

1302

missing_keys = f_missing_keys

1303

kg = _mod_graph.KnownGraph(parent_map)

1304

return kg

1305

1099

1306

def get_parent_map(self, keys):

1100

1307

"""Get a map of the graph parents of keys.

1101

1308

1128

1335

missing.difference_update(set(new_result))

1129

1336

return result, source_results

1130

1337

1131

def _get_block(self, index_memo):

1132

read_memo = index_memo[0:3]

1133

# get the group:

1134

try:

1135

block = self._group_cache[read_memo]

1136

except KeyError:

1137

# read the group

1138

zdata = self._access.get_raw_records([read_memo]).next()

1139

# decompress - whole thing - this is not a bug, as it

1140

# permits caching. We might want to store the partially

1141

# decompresed group and decompress object, so that recent

1142

# texts are not penalised by big groups.

1143

block = GroupCompressBlock.from_bytes(zdata)

1144

self._group_cache[read_memo] = block

1145

# cheapo debugging:

1146

# print len(zdata), len(plain)

1147

# parse - requires split_lines, better to have byte offsets

1148

# here (but not by much - we only split the region for the

1149

# recipe, and we often want to end up with lines anyway.

1150

return block

1338

def _get_blocks(self, read_memos):

1339

"""Get GroupCompressBlocks for the given read_memos.

1340

1341

:returns: a series of (read_memo, block) pairs, in the order they were

1342

originally passed.

1343

"""

1344

cached = {}

1345

for read_memo in read_memos:

1346

try:

1347

block = self._group_cache[read_memo]

1348

except KeyError:

1349

pass

1350

else:

1351

cached[read_memo] = block

1352

not_cached = []

1353

not_cached_seen = set()

1354

for read_memo in read_memos:

1355

if read_memo in cached:

1356

# Don't fetch what we already have

1357

continue

1358

if read_memo in not_cached_seen:

1359

# Don't try to fetch the same data twice

1360

continue

1361

not_cached.append(read_memo)

1362

not_cached_seen.add(read_memo)

1363

raw_records = self._access.get_raw_records(not_cached)

1364

for read_memo in read_memos:

1365

try:

1366

yield read_memo, cached[read_memo]

1367

except KeyError:

1368

# Read the block, and cache it.

1369

zdata = raw_records.next()

1370

block = GroupCompressBlock.from_bytes(zdata)

1371

self._group_cache[read_memo] = block

1372

cached[read_memo] = block

1373

yield read_memo, block

1151

1374

1152

1375

def get_missing_compression_parent_keys(self):

1153

1376

"""Return the keys of missing compression parents.

1319

1542

unadded_keys, source_result)

1320

1543

for key in missing:

1321

1544

yield AbsentContentFactory(key)

1322

manager = None

1323

last_read_memo = None

1324

# TODO: This works fairly well at batching up existing groups into a

1325

# streamable format, and possibly allowing for taking one big

1326

# group and splitting it when it isn't fully utilized.

1327

# However, it doesn't allow us to find under-utilized groups and

1328

# combine them into a bigger group on the fly.

1329

# (Consider the issue with how chk_map inserts texts

1330

# one-at-a-time.) This could be done at insert_record_stream()

1331

# time, but it probably would decrease the number of

1332

# bytes-on-the-wire for fetch.

1545

# Batch up as many keys as we can until either:

1546

# - we encounter an unadded ref, or

1547

# - we run out of keys, or

1548

# - the total bytes to retrieve for this batch > BATCH_SIZE

1549

batcher = _BatchingBlockFetcher(self, locations)

1333

1550

for source, keys in source_keys:

1334

1551

if source is self:

1335

1552

for key in keys:

1336

1553

if key in self._unadded_refs:

1337

if manager is not None:

1338

for factory in manager.get_record_stream():

1339

yield factory

1340

last_read_memo = manager = None

1554

# Flush batch, then yield unadded ref from

1555

# self._compressor.

1556

for factory in batcher.yield_factories(full_flush=True):

1557

yield factory

1341

1558

bytes, sha1 = self._compressor.extract(key)

1342

1559

parents = self._unadded_refs[key]

1343

1560

yield FulltextContentFactory(key, parents, sha1, bytes)

1344

else:

1345

index_memo, _, parents, (method, _) = locations[key]

1346

read_memo = index_memo[0:3]

1347

if last_read_memo != read_memo:

1348

# We are starting a new block. If we have a

1349

# manager, we have found everything that fits for

1350

# now, so yield records

1351

if manager is not None:

1352

for factory in manager.get_record_stream():

1353

yield factory

1354

# Now start a new manager

1355

block = self._get_block(index_memo)

1356

manager = _LazyGroupContentManager(block)

1357

last_read_memo = read_memo

1358

start, end = index_memo[3:5]

1359

manager.add_factory(key, parents, start, end)

1561

continue

1562

if batcher.add_key(key) > BATCH_SIZE:

1563

# Ok, this batch is big enough. Yield some results.

1564

for factory in batcher.yield_factories():

1565

yield factory

1360

1566

else:

1361

if manager is not None:

1362

for factory in manager.get_record_stream():

1363

yield factory

1364

last_read_memo = manager = None

1567

for factory in batcher.yield_factories(full_flush=True):

1568

yield factory

1365

1569

for record in source.get_record_stream(keys, ordering,

1366

1570

include_delta_closure):

1367

1571

yield record

1368

if manager is not None:

1369

for factory in manager.get_record_stream():

1370

yield factory

1572

for factory in batcher.yield_factories(full_flush=True):

1573

yield factory

1371

1574

1372

1575

def get_sha1s(self, keys):

1373

1576

"""See VersionedFiles.get_sha1s()."""

1428

1631

keys_to_add = []

1429

1632

def flush():

1430

1633

bytes = self._compressor.flush().to_bytes()

1634

self._compressor = GroupCompressor()

1431

1635

index, start, length = self._access.add_raw_records(

1432

1636

[(None, len(bytes))], bytes)[0]

1433

1637

nodes = []

1436

1640

self._index.add_records(nodes, random_id=random_id)

1437

1641

self._unadded_refs = {}

1438

1642

del keys_to_add[:]

1439

self._compressor = GroupCompressor()

1440

1643

1441

1644

last_prefix = None

1442

1645

max_fulltext_len = 0

1446

1649

block_length = None

1447

1650

# XXX: TODO: remove this, it is just for safety checking for now

1448

1651

inserted_keys = set()

1652

reuse_this_block = reuse_blocks

1449

1653

for record in stream:

1450

1654

# Raise an error when a record is missing.

1451

1655

if record.storage_kind == 'absent':

1459

1663

if reuse_blocks:

1460

1664

# If the reuse_blocks flag is set, check to see if we can just

1461

1665

# copy a groupcompress block as-is.

1666

# We only check on the first record (groupcompress-block) not

1667

# on all of the (groupcompress-block-ref) entries.

1668

# The reuse_this_block flag is then kept for as long as

1669

if record.storage_kind == 'groupcompress-block':

1670

# Check to see if we really want to re-use this block

1671

insert_manager = record._manager

1672

reuse_this_block = insert_manager.check_is_well_utilized()

1673

else:

1674

reuse_this_block = False

1675

if reuse_this_block:

1676

# We still want to reuse this block

1462

1677

if record.storage_kind == 'groupcompress-block':

1463

1678

# Insert the raw block into the target repo

1464

1679

insert_manager = record._manager

1465

insert_manager._check_rebuild_block()

1466

1680

bytes = record._manager._block.to_bytes()

1467

1681

_, start, length = self._access.add_raw_records(

1468

1682

[(None, len(bytes))], bytes)[0]

1473

1687

'groupcompress-block-ref'):

1474

1688

if insert_manager is None:

1475

1689

raise AssertionError('No insert_manager set')

1690

if insert_manager is not record._manager:

1691

raise AssertionError('insert_manager does not match'

1692

' the current record, we cannot be positive'

1693

' that the appropriate content was inserted.'

1694

)

1476

1695

value = "%d %d %d %d" % (block_start, block_length,

1477

1696

record._start, record._end)

1478

1697

nodes = [(record.key, value, (record.parents,))]

1528

1747

key = record.key

1529

1748

self._unadded_refs[key] = record.parents

1530

1749

yield found_sha1

1531

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1532

(record.parents,)))

1750

as_st = static_tuple.StaticTuple.from_sequence

1751

if record.parents is not None:

1752

parents = as_st([as_st(p) for p in record.parents])

1753

else:

1754

parents = None

1755

refs = static_tuple.StaticTuple(parents)

1756

keys_to_add.append((key, '%d %d' % (start_point, end_point), refs))

1533

1757

if len(keys_to_add):

1534

1758

flush()

1535

1759

self._compressor = None

1590

1814

1591

1815

def __init__(self, graph_index, is_locked, parents=True,

1592

1816

add_callback=None, track_external_parent_refs=False,

1593

inconsistency_fatal=True):

1817

inconsistency_fatal=True, track_new_keys=False):

1594

1818

"""Construct a _GCGraphIndex on a graph_index.

1595

1819

1596

1820

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1615

1839

self.has_graph = parents

1616

1840

self._is_locked = is_locked

1617

1841

self._inconsistency_fatal = inconsistency_fatal

1842

# GroupCompress records tend to have the same 'group' start + offset

1843

# repeated over and over, this creates a surplus of ints

1844

self._int_cache = {}

1618

1845

if track_external_parent_refs:

1619

self._key_dependencies = knit._KeyRefs()

1846

self._key_dependencies = knit._KeyRefs(

1847

track_new_keys=track_new_keys)

1620

1848

else:

1621

1849

self._key_dependencies = None

1622

1850

1655

1883

if not random_id:

1656

1884

present_nodes = self._get_entries(keys)

1657

1885

for (index, key, value, node_refs) in present_nodes:

1658

if node_refs != keys[key][1]:

1659

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1886

# Sometimes these are passed as a list rather than a tuple

1887

node_refs = static_tuple.as_tuples(node_refs)

1888

passed = static_tuple.as_tuples(keys[key])

1889

if node_refs != passed[1]:

1890

details = '%s %s %s' % (key, (value, node_refs), passed)

1660

1891

if self._inconsistency_fatal:

1661

1892

raise errors.KnitCorrupt(self, "inconsistent details"

1662

1893

" in add_records: %s" %

1676

1907

result.append((key, value))

1677

1908

records = result

1678

1909

key_dependencies = self._key_dependencies

1679

if key_dependencies is not None and self._parents:

1680

for key, value, refs in records:

1681

parents = refs[0]

1682

key_dependencies.add_references(key, parents)

1910

if key_dependencies is not None:

1911

if self._parents:

1912

for key, value, refs in records:

1913

parents = refs[0]

1914

key_dependencies.add_references(key, parents)

1915

else:

1916

for key, value, refs in records:

1917

new_keys.add_key(key)

1683

1918

self._add_callback(records)

1684

1919

1685

1920

def _check_read(self):

1716

1951

if missing_keys:

1717

1952

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1718

1953

1954

def find_ancestry(self, keys):

1955

"""See CombinedGraphIndex.find_ancestry"""

1956

return self._graph_index.find_ancestry(keys, 0)

1957

1719

1958

def get_parent_map(self, keys):

1720

1959

"""Get a map of the parents of keys.

1721

1960

1738

1977

"""Return the keys of missing parents."""

1739

1978

# Copied from _KnitGraphIndex.get_missing_parents

1740

1979

# We may have false positives, so filter those out.

1741

self._key_dependencies.add_keys(

1980

self._key_dependencies.satisfy_refs_for_keys(

1742

1981

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1743

1982

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1744

1983

1787

2026

"""Convert an index value to position details."""

1788

2027

bits = node[2].split(' ')

1789

2028

# It would be nice not to read the entire gzip.

2029

# start and stop are put into _int_cache because they are very common.

2030

# They define the 'group' that an entry is in, and many groups can have

2031

# thousands of objects.

2032

# Branching Launchpad, for example, saves ~600k integers, at 12 bytes

2033

# each, or about 7MB. Note that it might be even more when you consider

2034

# how PyInt is allocated in separate slabs. And you can't return a slab

2035

# to the OS if even 1 int on it is in use. Note though that Python uses

2036

# a LIFO when re-using PyInt slots, which probably causes more

2037

# fragmentation.

1790

2038

start = int(bits[0])

2039

start = self._int_cache.setdefault(start, start)

1791

2040

stop = int(bits[1])

2041

stop = self._int_cache.setdefault(stop, stop)

1792

2042

basis_end = int(bits[2])

1793

2043

delta_end = int(bits[3])

1794

return node[0], start, stop, basis_end, delta_end

2044

# We can't use StaticTuple here, because node[0] is a BTreeGraphIndex

2045

# instance...

2046

return (node[0], start, stop, basis_end, delta_end)

1795

2047

1796

2048

def scan_unvalidated_index(self, graph_index):

1797

2049

"""Inform this _GCGraphIndex that there is an unvalidated index.

1798

2050

1799

2051

This allows this _GCGraphIndex to keep track of any missing

1800

2052

compression parents we may want to have filled in to make those

1801

indices valid.

2053

indices valid. It also allows _GCGraphIndex to track any new keys.

1802

2054

1803

2055

:param graph_index: A GraphIndex

1804

2056

"""

1805

if self._key_dependencies is not None:

1806

# Add parent refs from graph_index (and discard parent refs that

1807

# the graph_index has).

1808

add_refs = self._key_dependencies.add_references

1809

for node in graph_index.iter_all_entries():

1810

add_refs(node[1], node[3][0])

1811

2057

key_dependencies = self._key_dependencies

2058

if key_dependencies is None:

2059

return

2060

for node in graph_index.iter_all_entries():

2061

# Add parent refs from graph_index (and discard parent refs

2062

# that the graph_index has).

2063

key_dependencies.add_references(node[1], node[3][0])

1812

2064

1813

2065

1814

2066

from bzrlib._groupcompress_py import (

1828

2080

decode_base128_int,

1829

2081

)

1830

2082

GroupCompressor = PyrexGroupCompressor

1831

except ImportError:

2083

except ImportError, e:

2084

osutils.failed_to_load_extension(e)

1832

2085

GroupCompressor = PythonGroupCompressor

1833

2086

Older »