~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Author(s): Mark Hammond
Date: 2008-09-09 17:02:21 UTC
mto: This revision was merged to the branch mainline in revision 3697.
Revision ID: john@arbash-meinel.com-20080909170221-svim3jw2mrz0amp3

An updated transparent icon for bzr.

files added:
bzrlib/_walkdirs_win32.h

bzrlib/help_topics/en/hooks.txt

bzrlib/tests/test_http_implementations.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/tests/test_bencode.py

doc/developers/performance-contributing.txt

files removed:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/clean_tree.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/inventory_delta.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/python-compat.h

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/fake_command.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/util/bencode.py

bzrlib/views.py

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/conf.py

doc/contents.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/lca_tree_merging.txt

doc/developers/overview.txt

doc/en/migration

doc/en/migration/index.txt

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/index-for-2x.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/shelving_changes.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/index.ru.txt

doc/make.bat

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/check-newsbugs.py

tools/packaging/lp-upload-release

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files renamed:
bzrlib/_btree_serializer_pyx.pyx => bzrlib/_btree_serializer_c.pyx

bzrlib/_dirstate_helpers_pyx.h => bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_pyx.pyx => bzrlib/_dirstate_helpers_c.pyx

bzrlib/_knit_load_data_pyx.pyx => bzrlib/_knit_load_data_c.pyx

bzrlib/tests/per_branch/ => bzrlib/tests/branch_implementations/

bzrlib/tests/per_bzrdir/ => bzrlib/tests/bzrdir_implementations/

bzrlib/tests/ftp_server/medusa_based.py => bzrlib/tests/ftp_server.py

bzrlib/tests/per_interrepository/ => bzrlib/tests/interrepository_implementations/

bzrlib/tests/per_intertree/ => bzrlib/tests/intertree_implementations/

bzrlib/tests/per_inventory/ => bzrlib/tests/inventory_implementations/

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/per_transport.py => bzrlib/tests/test_transport_implementations.py

bzrlib/tests/per_tree/ => bzrlib/tests/tree_implementations/

bzrlib/tests/per_workingtree/ => bzrlib/tests/workingtree_implementations/

bzrlib/util/_bencode_py.py => bzrlib/util/bencode.py

doc/en/developer-guide/HACKING.txt => doc/developers/HACKING.txt

doc/_static/en/quick-reference/Makefile => doc/en/quick-reference/Makefile

doc/_static/en/quick-reference/bzr-quick-reference.pdf => doc/en/quick-reference/quick-start-summary.pdf

doc/_static/en/quick-reference/bzr-quick-reference.png => doc/en/quick-reference/quick-start-summary.png

doc/_static/en/quick-reference/bzr-quick-reference.svg => doc/en/quick-reference/quick-start-summary.svg

doc/es/developer-guide/ => doc/es/guia-desarrollador/

doc/es/user-guide/ => doc/es/guia-usuario/

doc/es/release-notes/ => doc/es/notas-version/

doc/es/quick-reference/ => doc/es/referencia-rapida/

doc/es/quick-reference/quick-start-summary.svg => doc/es/referencia-rapida/referencia-rapida.svg

doc/es/user-reference/ => doc/es/referencia/

tools/generate_docs.py => generate_docs.py

bzrlib/doc_generate/ => tools/doc_generate/

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_add_fallback_repository.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib/bzr_access

contrib/newinventory.py

contrib/pwclient.full

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/network-protocol.txt

doc/developers/performance-roadmap.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/revision-properties.txt

doc/developers/testing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

doc/es/referencia-rapida/Makefile

doc/index.es.txt

doc/index.txt

profile_imports.py

setup.py

tools/bzr_epydoc_uid.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/package_mf.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/weavebench.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.graph import Graph

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

except KeyError:

per_prefix_map[prefix] = [item]

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

def __init__(self):

100

# map by key? or just order in file?

101

self._compressor_name = None

102

self._z_content = None

103

self._z_content_decompressor = None

104

self._z_content_length = None

105

self._content_length = None

106

self._content = None

107

self._content_chunks = None

108

109

def __len__(self):

110

# This is the maximum number of bytes this object will reference if

111

# everything is decompressed. However, if we decompress less than

112

# everything... (this would cause some problems for LRUSizeCache)

113

return self._content_length + self._z_content_length

114

115

def _ensure_content(self, num_bytes=None):

116

"""Make sure that content has been expanded enough.

117

118

:param num_bytes: Ensure that we have extracted at least num_bytes of

119

content. If None, consume everything

120

"""

121

# TODO: If we re-use the same content block at different times during

122

# get_record_stream(), it is possible that the first pass will

123

# get inserted, triggering an extract/_ensure_content() which

124

# will get rid of _z_content. And then the next use of the block

125

# will try to access _z_content (to send it over the wire), and

126

# fail because it is already extracted. Consider never releasing

127

# _z_content because of this.

128

if num_bytes is None:

129

num_bytes = self._content_length

130

elif (self._content_length is not None

131

and num_bytes > self._content_length):

132

raise AssertionError(

133

'requested num_bytes (%d) > content length (%d)'

134

% (num_bytes, self._content_length))

135

# Expand the content if required

136

if self._content is None:

137

if self._content_chunks is not None:

138

self._content = ''.join(self._content_chunks)

139

self._content_chunks = None

140

if self._content is None:

141

if self._z_content is None:

142

raise AssertionError('No content to decompress')

143

if self._z_content == '':

144

self._content = ''

145

elif self._compressor_name == 'lzma':

146

# We don't do partial lzma decomp yet

147

self._content = pylzma.decompress(self._z_content)

148

elif self._compressor_name == 'zlib':

149

# Start a zlib decompressor

150

if num_bytes is None:

151

self._content = zlib.decompress(self._z_content)

152

else:

153

self._z_content_decompressor = zlib.decompressobj()

154

# Seed the decompressor with the uncompressed bytes, so

155

# that the rest of the code is simplified

156

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

else:

159

raise AssertionError('Unknown compressor: %r'

160

% self._compressor_name)

161

# Any bytes remaining to be decompressed will be in the decompressors

162

# 'unconsumed_tail'

163

164

# Do we have enough bytes already?

165

if num_bytes is not None and len(self._content) >= num_bytes:

166

return

167

if num_bytes is None and self._z_content_decompressor is None:

168

# We must have already decompressed everything

169

return

170

# If we got this far, and don't have a decompressor, something is wrong

171

if self._z_content_decompressor is None:

172

raise AssertionError(

173

'No decompressor to decompress %d bytes' % num_bytes)

174

remaining_decomp = self._z_content_decompressor.unconsumed_tail

175

if num_bytes is None:

176

if remaining_decomp:

177

# We don't know how much is left, but we'll decompress it all

178

self._content += self._z_content_decompressor.decompress(

179

remaining_decomp)

180

# Note: There's what I consider a bug in zlib.decompressobj

181

# If you pass back in the entire unconsumed_tail, only

182

# this time you don't pass a max-size, it doesn't

183

# change the unconsumed_tail back to None/''.

184

# However, we know we are done with the whole stream

185

self._z_content_decompressor = None

186

# XXX: Why is this the only place in this routine we set this?

187

self._content_length = len(self._content)

188

else:

189

if not remaining_decomp:

190

raise AssertionError('Nothing left to decompress')

191

needed_bytes = num_bytes - len(self._content)

192

# We always set max_size to 32kB over the minimum needed, so that

193

# zlib will give us as much as we really want.

194

# TODO: If this isn't good enough, we could make a loop here,

195

# that keeps expanding the request until we get enough

196

self._content += self._z_content_decompressor.decompress(

197

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

198

if len(self._content) < num_bytes:

199

raise AssertionError('%d bytes wanted, only %d available'

200

% (num_bytes, len(self._content)))

201

if not self._z_content_decompressor.unconsumed_tail:

202

# The stream is finished

203

self._z_content_decompressor = None

204

205

def _parse_bytes(self, bytes, pos):

206

"""Read the various lengths from the header.

207

208

This also populates the various 'compressed' buffers.

209

210

:return: The position in bytes just after the last newline

211

"""

212

# At present, we have 2 integers for the compressed and uncompressed

213

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

214

# checking too far, cap the search to 14 bytes.

215

pos2 = bytes.index('\n', pos, pos + 14)

216

self._z_content_length = int(bytes[pos:pos2])

217

pos = pos2 + 1

218

pos2 = bytes.index('\n', pos, pos + 14)

219

self._content_length = int(bytes[pos:pos2])

220

pos = pos2 + 1

221

if len(bytes) != (pos + self._z_content_length):

222

# XXX: Define some GCCorrupt error ?

223

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

224

(len(bytes), pos, self._z_content_length))

225

self._z_content = bytes[pos:]

226

227

@classmethod

228

def from_bytes(cls, bytes):

229

out = cls()

230

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

231

raise ValueError('bytes did not start with any of %r'

232

% (cls.GCB_KNOWN_HEADERS,))

233

# XXX: why not testing the whole header ?

234

if bytes[4] == 'z':

235

out._compressor_name = 'zlib'

236

elif bytes[4] == 'l':

237

out._compressor_name = 'lzma'

238

else:

239

raise ValueError('unknown compressor: %r' % (bytes,))

240

out._parse_bytes(bytes, 6)

241

return out

242

243

def extract(self, key, start, end, sha1=None):

244

"""Extract the text for a specific key.

245

246

:param key: The label used for this content

247

:param sha1: TODO (should we validate only when sha1 is supplied?)

248

:return: The bytes for the content

249

"""

250

if start == end == 0:

251

return ''

252

self._ensure_content(end)

253

# The bytes are 'f' or 'd' for the type, then a variable-length

254

# base128 integer for the content size, then the actual content

255

# We know that the variable-length integer won't be longer than 5

256

# bytes (it takes 5 bytes to encode 2^32)

257

c = self._content[start]

258

if c == 'f':

259

type = 'fulltext'

260

else:

261

if c != 'd':

262

raise ValueError('Unknown content control code: %s'

263

% (c,))

264

type = 'delta'

265

content_len, len_len = decode_base128_int(

266

self._content[start + 1:start + 6])

267

content_start = start + 1 + len_len

268

if end != content_start + content_len:

269

raise ValueError('end != len according to field header'

270

' %s != %s' % (end, content_start + content_len))

271

if c == 'f':

272

bytes = self._content[content_start:end]

273

elif c == 'd':

274

bytes = apply_delta_to_source(self._content, content_start, end)

275

return bytes

276

277

def set_chunked_content(self, content_chunks, length):

278

"""Set the content of this block to the given chunks."""

279

# If we have lots of short lines, it is may be more efficient to join

280

# the content ahead of time. If the content is <10MiB, we don't really

281

# care about the extra memory consumption, so we can just pack it and

282

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

283

# mysql, which is below the noise margin

284

self._content_length = length

285

self._content_chunks = content_chunks

286

self._content = None

287

self._z_content = None

288

289

def set_content(self, content):

290

"""Set the content of this block."""

291

self._content_length = len(content)

292

self._content = content

293

self._z_content = None

294

295

def _create_z_content_using_lzma(self):

296

if self._content_chunks is not None:

297

self._content = ''.join(self._content_chunks)

298

self._content_chunks = None

299

if self._content is None:

300

raise AssertionError('Nothing to compress')

301

self._z_content = pylzma.compress(self._content)

302

self._z_content_length = len(self._z_content)

303

304

def _create_z_content_from_chunks(self):

305

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

306

compressed_chunks = map(compressor.compress, self._content_chunks)

307

compressed_chunks.append(compressor.flush())

308

self._z_content = ''.join(compressed_chunks)

309

self._z_content_length = len(self._z_content)

310

311

def _create_z_content(self):

312

if self._z_content is not None:

313

return

314

if _USE_LZMA:

315

self._create_z_content_using_lzma()

316

return

317

if self._content_chunks is not None:

318

self._create_z_content_from_chunks()

319

return

320

self._z_content = zlib.compress(self._content)

321

self._z_content_length = len(self._z_content)

322

323

def to_bytes(self):

324

"""Encode the information into a byte stream."""

325

self._create_z_content()

326

if _USE_LZMA:

327

header = self.GCB_LZ_HEADER

328

else:

329

header = self.GCB_HEADER

330

chunks = [header,

331

'%d\n%d\n' % (self._z_content_length, self._content_length),

332

self._z_content,

333

]

334

return ''.join(chunks)

335

336

def _dump(self, include_text=False):

337

"""Take this block, and spit out a human-readable structure.

338

339

:param include_text: Inserts also include text bits, chose whether you

340

want this displayed in the dump or not.

341

:return: A dump of the given block. The layout is something like:

342

[('f', length), ('d', delta_length, text_length, [delta_info])]

343

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

344

...]

345

"""

346

self._ensure_content()

347

result = []

348

pos = 0

349

while pos < self._content_length:

350

kind = self._content[pos]

351

pos += 1

352

if kind not in ('f', 'd'):

353

raise ValueError('invalid kind character: %r' % (kind,))

354

content_len, len_len = decode_base128_int(

355

self._content[pos:pos + 5])

356

pos += len_len

357

if content_len + pos > self._content_length:

358

raise ValueError('invalid content_len %d for record @ pos %d'

359

% (content_len, pos - len_len - 1))

360

if kind == 'f': # Fulltext

361

if include_text:

362

text = self._content[pos:pos+content_len]

363

result.append(('f', content_len, text))

364

else:

365

result.append(('f', content_len))

366

elif kind == 'd': # Delta

367

delta_content = self._content[pos:pos+content_len]

368

delta_info = []

369

# The first entry in a delta is the decompressed length

370

decomp_len, delta_pos = decode_base128_int(delta_content)

371

result.append(('d', content_len, decomp_len, delta_info))

372

measured_len = 0

373

while delta_pos < content_len:

374

c = ord(delta_content[delta_pos])

375

delta_pos += 1

376

if c & 0x80: # Copy

377

(offset, length,

378

delta_pos) = decode_copy_instruction(delta_content, c,

379

delta_pos)

380

if include_text:

381

text = self._content[offset:offset+length]

382

delta_info.append(('c', offset, length, text))

383

else:

384

delta_info.append(('c', offset, length))

385

measured_len += length

386

else: # Insert

387

if include_text:

388

txt = delta_content[delta_pos:delta_pos+c]

389

else:

390

txt = ''

391

delta_info.append(('i', c, txt))

392

measured_len += c

393

delta_pos += c

394

if delta_pos != content_len:

395

raise ValueError('Delta consumed a bad number of bytes:'

396

' %d != %d' % (delta_pos, content_len))

397

if measured_len != decomp_len:

398

raise ValueError('Delta claimed fulltext was %d bytes, but'

399

' extraction resulted in %d bytes'

400

% (decomp_len, measured_len))

401

pos += content_len

402

return result

403

404

405

class _LazyGroupCompressFactory(object):

406

"""Yield content from a GroupCompressBlock on demand."""

407

408

def __init__(self, key, parents, manager, start, end, first):

409

"""Create a _LazyGroupCompressFactory

410

411

:param key: The key of just this record

412

:param parents: The parents of this key (possibly None)

413

:param gc_block: A GroupCompressBlock object

414

:param start: Offset of the first byte for this record in the

415

uncompressd content

416

:param end: Offset of the byte just after the end of this record

417

(ie, bytes = content[start:end])

418

:param first: Is this the first Factory for the given block?

419

"""

420

self.key = key

421

self.parents = parents

422

self.sha1 = None

423

# Note: This attribute coupled with Manager._factories creates a

424

# reference cycle. Perhaps we would rather use a weakref(), or

425

# find an appropriate time to release the ref. After the first

426

# get_bytes_as call? After Manager.get_record_stream() returns

427

# the object?

428

self._manager = manager

429

self._bytes = None

430

self.storage_kind = 'groupcompress-block'

431

if not first:

432

self.storage_kind = 'groupcompress-block-ref'

433

self._first = first

434

self._start = start

435

self._end = end

436

437

def __repr__(self):

438

return '%s(%s, first=%s)' % (self.__class__.__name__,

439

self.key, self._first)

440

441

def get_bytes_as(self, storage_kind):

442

if storage_kind == self.storage_kind:

443

if self._first:

444

# wire bytes, something...

445

return self._manager._wire_bytes()

446

else:

447

return ''

448

if storage_kind in ('fulltext', 'chunked'):

449

if self._bytes is None:

450

# Grab and cache the raw bytes for this entry

451

# and break the ref-cycle with _manager since we don't need it

452

# anymore

453

self._manager._prepare_for_extract()

454

block = self._manager._block

455

self._bytes = block.extract(self.key, self._start, self._end)

456

# There are code paths that first extract as fulltext, and then

457

# extract as storage_kind (smart fetch). So we don't break the

458

# refcycle here, but instead in manager.get_record_stream()

459

# self._manager = None

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

def __init__(self, block):

472

self._block = block

473

# We need to preserve the ordering

474

self._factories = []

475

self._last_byte = 0

476

477

def add_factory(self, key, parents, start, end):

478

if not self._factories:

479

first = True

480

else:

481

first = False

482

# Note that this creates a reference cycle....

483

factory = _LazyGroupCompressFactory(key, parents, self,

484

start, end, first=first)

485

# max() works here, but as a function call, doing a compare seems to be

486

# significantly faster, timeit says 250ms for max() and 100ms for the

487

# comparison

488

if end > self._last_byte:

489

self._last_byte = end

490

self._factories.append(factory)

491

492

def get_record_stream(self):

493

"""Get a record for all keys added so far."""

494

for factory in self._factories:

495

yield factory

496

# Break the ref-cycle

497

factory._bytes = None

498

factory._manager = None

499

# TODO: Consider setting self._factories = None after the above loop,

500

# as it will break the reference cycle

501

502

def _trim_block(self, last_byte):

503

"""Create a new GroupCompressBlock, with just some of the content."""

504

# None of the factories need to be adjusted, because the content is

505

# located in an identical place. Just that some of the unreferenced

506

# trailing bytes are stripped

507

trace.mutter('stripping trailing bytes from groupcompress block'

508

' %d => %d', self._block._content_length, last_byte)

509

new_block = GroupCompressBlock()

510

self._block._ensure_content(last_byte)

511

new_block.set_content(self._block._content[:last_byte])

512

self._block = new_block

513

514

def _rebuild_block(self):

515

"""Create a new GroupCompressBlock with only the referenced texts."""

516

compressor = GroupCompressor()

517

tstart = time.time()

518

old_length = self._block._content_length

519

end_point = 0

520

for factory in self._factories:

521

bytes = factory.get_bytes_as('fulltext')

522

(found_sha1, start_point, end_point,

523

type) = compressor.compress(factory.key, bytes, factory.sha1)

524

# Now update this factory with the new offsets, etc

525

factory.sha1 = found_sha1

526

factory._start = start_point

527

factory._end = end_point

528

self._last_byte = end_point

529

new_block = compressor.flush()

530

# TODO: Should we check that new_block really *is* smaller than the old

531

# block? It seems hard to come up with a method that it would

532

# expand, since we do full compression again. Perhaps based on a

533

# request that ends up poorly ordered?

534

delta = time.time() - tstart

535

self._block = new_block

536

trace.mutter('creating new compressed block on-the-fly in %.3fs'

537

' %d bytes => %d bytes', delta, old_length,

538

self._block._content_length)

539

540

def _prepare_for_extract(self):

541

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

542

# We expect that if one child is going to fulltext, all will be. This

543

# helps prevent all of them from extracting a small amount at a time.

544

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

545

# time (self._block._content) is a little expensive.

546

self._block._ensure_content(self._last_byte)

547

548

def _check_rebuild_block(self):

549

"""Check to see if our block should be repacked."""

550

total_bytes_used = 0

551

last_byte_used = 0

552

for factory in self._factories:

553

total_bytes_used += factory._end - factory._start

554

last_byte_used = max(last_byte_used, factory._end)

555

# If we are using most of the bytes from the block, we have nothing

556

# else to check (currently more that 1/2)

557

if total_bytes_used * 2 >= self._block._content_length:

558

return

559

# Can we just strip off the trailing bytes? If we are going to be

560

# transmitting more than 50% of the front of the content, go ahead

561

if total_bytes_used * 2 > last_byte_used:

562

self._trim_block(last_byte_used)

563

return

564

565

# We are using a small amount of the data, and it isn't just packed

566

# nicely at the front, so rebuild the content.

567

# Note: This would be *nicer* as a strip-data-from-group, rather than

568

# building it up again from scratch

569

# It might be reasonable to consider the fulltext sizes for

570

# different bits when deciding this, too. As you may have a small

571

# fulltext, and a trivial delta, and you are just trading around

572

# for another fulltext. If we do a simple 'prune' you may end up

573

# expanding many deltas into fulltexts, as well.

574

# If we build a cheap enough 'strip', then we could try a strip,

575

# if that expands the content, we then rebuild.

576

self._rebuild_block()

577

578

def _wire_bytes(self):

579

"""Return a byte stream suitable for transmitting over the wire."""

580

self._check_rebuild_block()

581

# The outer block starts with:

582

# 'groupcompress-block\n'

583

# <length of compressed key info>\n

584

# <length of uncompressed info>\n

585

# <length of gc block>\n

586

# <header bytes>

587

# <gc-block>

588

lines = ['groupcompress-block\n']

589

# The minimal info we need is the key, the start offset, and the

590

# parents. The length and type are encoded in the record itself.

591

# However, passing in the other bits makes it easier. The list of

592

# keys, and the start offset, the length

593

# 1 line key

594

# 1 line with parents, '' for ()

595

# 1 line for start offset

596

# 1 line for end byte

597

header_lines = []

598

for factory in self._factories:

599

key_bytes = '\x00'.join(factory.key)

600

parents = factory.parents

601

if parents is None:

602

parent_bytes = 'None:'

603

else:

604

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

605

record_header = '%s\n%s\n%d\n%d\n' % (

606

key_bytes, parent_bytes, factory._start, factory._end)

607

header_lines.append(record_header)

608

# TODO: Can we break the refcycle at this point and set

609

# factory._manager = None?

610

header_bytes = ''.join(header_lines)

611

del header_lines

612

header_bytes_len = len(header_bytes)

613

z_header_bytes = zlib.compress(header_bytes)

614

del header_bytes

615

z_header_bytes_len = len(z_header_bytes)

616

block_bytes = self._block.to_bytes()

617

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

618

len(block_bytes)))

619

lines.append(z_header_bytes)

620

lines.append(block_bytes)

621

del z_header_bytes, block_bytes

622

return ''.join(lines)

623

624

@classmethod

625

def from_bytes(cls, bytes):

626

# TODO: This does extra string copying, probably better to do it a

627

# different way

628

(storage_kind, z_header_len, header_len,

629

block_len, rest) = bytes.split('\n', 4)

630

del bytes

631

if storage_kind != 'groupcompress-block':

632

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

633

z_header_len = int(z_header_len)

634

if len(rest) < z_header_len:

635

raise ValueError('Compressed header len shorter than all bytes')

636

z_header = rest[:z_header_len]

637

header_len = int(header_len)

638

header = zlib.decompress(z_header)

639

if len(header) != header_len:

640

raise ValueError('invalid length for decompressed bytes')

641

del z_header

642

block_len = int(block_len)

643

if len(rest) != z_header_len + block_len:

644

raise ValueError('Invalid length for block')

645

block_bytes = rest[z_header_len:]

646

del rest

647

# So now we have a valid GCB, we just need to parse the factories that

648

# were sent to us

649

header_lines = header.split('\n')

650

del header

651

last = header_lines.pop()

652

if last != '':

653

raise ValueError('header lines did not end with a trailing'

654

' newline')

655

if len(header_lines) % 4 != 0:

656

raise ValueError('The header was not an even multiple of 4 lines')

657

block = GroupCompressBlock.from_bytes(block_bytes)

658

del block_bytes

659

result = cls(block)

660

for start in xrange(0, len(header_lines), 4):

661

# intern()?

662

key = tuple(header_lines[start].split('\x00'))

663

parents_line = header_lines[start+1]

664

if parents_line == 'None:':

665

parents = None

666

else:

667

parents = tuple([tuple(segment.split('\x00'))

668

for segment in parents_line.split('\t')

669

if segment])

670

start_offset = int(header_lines[start+2])

671

end_offset = int(header_lines[start+3])

672

result.add_factory(key, parents, start_offset, end_offset)

673

return result

674

675

676

def network_block_to_records(storage_kind, bytes, line_end):

677

if storage_kind != 'groupcompress-block':

678

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

679

manager = _LazyGroupContentManager.from_bytes(bytes)

680

return manager.get_record_stream()

681

682

683

class _CommonGroupCompressor(object):

684

685

def __init__(self):

686

"""Create a GroupCompressor."""

687

self.chunks = []

688

self._last = None

689

self.endpoint = 0

690

self.input_bytes = 0

691

self.labels_deltas = {}

692

self._delta_index = None # Set by the children

693

self._block = GroupCompressBlock()

694

695

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

696

"""Compress lines with label key.

697

698

:param key: A key tuple. It is stored in the output

699

for identification of the text during decompression. If the last

700

element is 'None' it is replaced with the sha1 of the text -

701

e.g. sha1:xxxxxxx.

702

:param bytes: The bytes to be compressed

703

:param expected_sha: If non-None, the sha the lines are believed to

704

have. During compression the sha is calculated; a mismatch will

705

cause an error.

706

:param nostore_sha: If the computed sha1 sum matches, we will raise

707

ExistingContent rather than adding the text.

708

:param soft: Do a 'soft' compression. This means that we require larger

709

ranges to match to be considered for a copy command.

710

711

:return: The sha1 of lines, the start and end offsets in the delta, and

712

the type ('fulltext' or 'delta').

713

714

:seealso VersionedFiles.add_lines:

715

"""

716

if not bytes: # empty, like a dir entry, etc

717

if nostore_sha == _null_sha1:

718

raise errors.ExistingContent()

719

return _null_sha1, 0, 0, 'fulltext'

720

# we assume someone knew what they were doing when they passed it in

721

if expected_sha is not None:

722

sha1 = expected_sha

723

else:

724

sha1 = osutils.sha_string(bytes)

725

if nostore_sha is not None:

726

if sha1 == nostore_sha:

727

raise errors.ExistingContent()

728

if key[-1] is None:

729

key = key[:-1] + ('sha1:' + sha1,)

730

731

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

732

return sha1, start, end, type

733

734

def _compress(self, key, bytes, max_delta_size, soft=False):

735

"""Compress lines with label key.

736

737

:param key: A key tuple. It is stored in the output for identification

738

of the text during decompression.

739

740

:param bytes: The bytes to be compressed

741

742

:param max_delta_size: The size above which we issue a fulltext instead

743

of a delta.

744

745

:param soft: Do a 'soft' compression. This means that we require larger

746

ranges to match to be considered for a copy command.

747

748

:return: The sha1 of lines, the start and end offsets in the delta, and

749

the type ('fulltext' or 'delta').

750

"""

751

raise NotImplementedError(self._compress)

752

753

def extract(self, key):

754

"""Extract a key previously added to the compressor.

755

756

:param key: The key to extract.

757

:return: An iterable over bytes and the sha1.

758

"""

759

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

760

delta_chunks = self.chunks[start_chunk:end_chunk]

761

stored_bytes = ''.join(delta_chunks)

762

if stored_bytes[0] == 'f':

763

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

764

data_len = fulltext_len + 1 + offset

765

if data_len != len(stored_bytes):

766

raise ValueError('Index claimed fulltext len, but stored bytes'

767

' claim %s != %s'

768

% (len(stored_bytes), data_len))

769

bytes = stored_bytes[offset + 1:]

770

else:

771

# XXX: This is inefficient at best

772

source = ''.join(self.chunks[:start_chunk])

773

if stored_bytes[0] != 'd':

774

raise ValueError('Unknown content kind, bytes claim %s'

775

% (stored_bytes[0],))

776

delta_len, offset = decode_base128_int(stored_bytes[1:10])

777

data_len = delta_len + 1 + offset

778

if data_len != len(stored_bytes):

779

raise ValueError('Index claimed delta len, but stored bytes'

780

' claim %s != %s'

781

% (len(stored_bytes), data_len))

782

bytes = apply_delta(source, stored_bytes[offset + 1:])

783

bytes_sha1 = osutils.sha_string(bytes)

784

return bytes, bytes_sha1

785

786

def flush(self):

787

"""Finish this group, creating a formatted stream.

788

789

After calling this, the compressor should no longer be used

790

"""

791

# TODO: this causes us to 'bloat' to 2x the size of content in the

792

# group. This has an impact for 'commit' of large objects.

793

# One possibility is to use self._content_chunks, and be lazy and

794

# only fill out self._content as a full string when we actually

795

# need it. That would at least drop the peak memory consumption

796

# for 'commit' down to ~1x the size of the largest file, at a

797

# cost of increased complexity within this code. 2x is still <<

798

# 3x the size of the largest file, so we are doing ok.

799

self._block.set_chunked_content(self.chunks, self.endpoint)

800

self.chunks = None

801

self._delta_index = None

802

return self._block

803

804

def pop_last(self):

805

"""Call this if you want to 'revoke' the last compression.

806

807

After this, the data structures will be rolled back, but you cannot do

808

more compression.

809

"""

810

self._delta_index = None

811

del self.chunks[self._last[0]:]

812

self.endpoint = self._last[1]

813

self._last = None

814

815

def ratio(self):

816

"""Return the overall compression ratio."""

817

return float(self.input_bytes) / float(self.endpoint)

818

819

820

class PythonGroupCompressor(_CommonGroupCompressor):

821

822

def __init__(self):

823

"""Create a GroupCompressor.

824

825

Used only if the pyrex version is not available.

826

"""

827

super(PythonGroupCompressor, self).__init__()

828

self._delta_index = LinesDeltaIndex([])

829

# The actual content is managed by LinesDeltaIndex

830

self.chunks = self._delta_index.lines

831

832

def _compress(self, key, bytes, max_delta_size, soft=False):

833

"""see _CommonGroupCompressor._compress"""

834

input_len = len(bytes)

835

new_lines = osutils.split_lines(bytes)

836

out_lines, index_lines = self._delta_index.make_delta(

837

new_lines, bytes_length=input_len, soft=soft)

838

delta_length = sum(map(len, out_lines))

839

if delta_length > max_delta_size:

840

# The delta is longer than the fulltext, insert a fulltext

841

type = 'fulltext'

842

out_lines = ['f', encode_base128_int(input_len)]

843

out_lines.extend(new_lines)

844

index_lines = [False, False]

845

index_lines.extend([True] * len(new_lines))

846

else:

847

# this is a worthy delta, output it

848

type = 'delta'

849

out_lines[0] = 'd'

850

# Update the delta_length to include those two encoded integers

851

out_lines[1] = encode_base128_int(delta_length)

852

# Before insertion

853

start = self.endpoint

854

chunk_start = len(self.chunks)

855

self._last = (chunk_start, self.endpoint)

856

self._delta_index.extend_lines(out_lines, index_lines)

857

self.endpoint = self._delta_index.endpoint

858

self.input_bytes += input_len

859

chunk_end = len(self.chunks)

860

self.labels_deltas[key] = (start, chunk_start,

861

self.endpoint, chunk_end)

862

return start, self.endpoint, type

863

864

865

class PyrexGroupCompressor(_CommonGroupCompressor):

866

"""Produce a serialised group of compressed texts.

867

868

It contains code very similar to SequenceMatcher because of having a similar

869

task. However some key differences apply:

870

- there is no junk, we want a minimal edit not a human readable diff.

871

- we don't filter very common lines (because we don't know where a good

872

range will start, and after the first text we want to be emitting minmal

873

edits only.

874

- we chain the left side, not the right side

875

- we incrementally update the adjacency matrix as new lines are provided.

876

- we look for matches in all of the left side, so the routine which does

877

the analagous task of find_longest_match does not need to filter on the

878

left side.

879

"""

880

881

def __init__(self):

882

super(PyrexGroupCompressor, self).__init__()

883

self._delta_index = DeltaIndex()

884

885

def _compress(self, key, bytes, max_delta_size, soft=False):

886

"""see _CommonGroupCompressor._compress"""

887

input_len = len(bytes)

888

# By having action/label/sha1/len, we can parse the group if the index

889

# was ever destroyed, we have the key in 'label', we know the final

890

# bytes are valid from sha1, and we know where to find the end of this

891

# record because of 'len'. (the delta record itself will store the

892

# total length for the expanded record)

893

# 'len: %d\n' costs approximately 1% increase in total data

894

# Having the labels at all costs us 9-10% increase, 38% increase for

895

# inventory pages, and 5.8% increase for text pages

896

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

897

if self._delta_index._source_offset != self.endpoint:

898

raise AssertionError('_source_offset != endpoint'

899

' somehow the DeltaIndex got out of sync with'

900

' the output lines')

901

delta = self._delta_index.make_delta(bytes, max_delta_size)

902

if (delta is None):

903

type = 'fulltext'

904

enc_length = encode_base128_int(len(bytes))

905

len_mini_header = 1 + len(enc_length)

906

self._delta_index.add_source(bytes, len_mini_header)

907

new_chunks = ['f', enc_length, bytes]

908

else:

909

type = 'delta'

910

enc_length = encode_base128_int(len(delta))

911

len_mini_header = 1 + len(enc_length)

912

new_chunks = ['d', enc_length, delta]

913

self._delta_index.add_delta_source(delta, len_mini_header)

914

# Before insertion

915

start = self.endpoint

916

chunk_start = len(self.chunks)

917

# Now output these bytes

918

self._output_chunks(new_chunks)

919

self.input_bytes += input_len

920

chunk_end = len(self.chunks)

921

self.labels_deltas[key] = (start, chunk_start,

922

self.endpoint, chunk_end)

923

if not self._delta_index._source_offset == self.endpoint:

924

raise AssertionError('the delta index is out of sync'

925

'with the output lines %s != %s'

926

% (self._delta_index._source_offset, self.endpoint))

927

return start, self.endpoint, type

928

929

def _output_chunks(self, new_chunks):

930

"""Output some chunks.

931

932

:param new_chunks: The chunks to output.

933

"""

934

self._last = (len(self.chunks), self.endpoint)

935

endpoint = self.endpoint

936

self.chunks.extend(new_chunks)

937

endpoint += sum(map(len, new_chunks))

938

self.endpoint = endpoint

939

940

941

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

942

"""Create a factory for creating a pack based groupcompress.

943

944

This is only functional enough to run interface tests, it doesn't try to

945

provide a full pack environment.

946

947

:param graph: Store a graph.

948

:param delta: Delta compress contents.

949

:param keylength: How long should keys be.

950

"""

951

def factory(transport):

952

parents = graph

953

ref_length = 0

954

if graph:

955

ref_length = 1

956

graph_index = BTreeBuilder(reference_lists=ref_length,

957

key_elements=keylength)

958

stream = transport.open_write_stream('newpack')

959

writer = pack.ContainerWriter(stream.write)

960

writer.begin()

961

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

962

add_callback=graph_index.add_nodes,

963

inconsistency_fatal=inconsistency_fatal)

964

access = knit._DirectPackAccess({})

965

access.set_writer(writer, graph_index, (transport, 'newpack'))

966

result = GroupCompressVersionedFiles(index, access, delta)

967

result.stream = stream

968

result.writer = writer

969

return result

970

return factory

971

972

973

def cleanup_pack_group(versioned_files):

974

versioned_files.writer.end()

975

versioned_files.stream.close()

976

977

978

class GroupCompressVersionedFiles(VersionedFiles):

979

"""A group-compress based VersionedFiles implementation."""

980

981

def __init__(self, index, access, delta=True):

982

"""Create a GroupCompressVersionedFiles object.

983

984

:param index: The index object storing access and graph data.

985

:param access: The access object storing raw data.

986

:param delta: Whether to delta compress or just entropy compress.

987

"""

988

self._index = index

989

self._access = access

990

self._delta = delta

991

self._unadded_refs = {}

992

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

993

self._fallback_vfs = []

994

995

def add_lines(self, key, parents, lines, parent_texts=None,

996

left_matching_blocks=None, nostore_sha=None, random_id=False,

997

check_content=True):

998

"""Add a text to the store.

999

1000

:param key: The key tuple of the text to add.

1001

:param parents: The parents key tuples of the text to add.

1002

:param lines: A list of lines. Each line must be a bytestring. And all

1003

of them except the last must be terminated with \n and contain no

1004

other \n's. The last line may either contain no \n's or a single

1005

terminating \n. If the lines list does meet this constraint the add

1006

routine may error or may succeed - but you will be unable to read

1007

the data back accurately. (Checking the lines have been split

1008

correctly is expensive and extremely unlikely to catch bugs so it

1009

is not done at runtime unless check_content is True.)

1010

:param parent_texts: An optional dictionary containing the opaque

1011

representations of some or all of the parents of version_id to

1012

allow delta optimisations. VERY IMPORTANT: the texts must be those

1013

returned by add_lines or data corruption can be caused.

1014

:param left_matching_blocks: a hint about which areas are common

1015

between the text and its left-hand-parent. The format is

1016

the SequenceMatcher.get_matching_blocks format.

1017

:param nostore_sha: Raise ExistingContent and do not add the lines to

1018

the versioned file if the digest of the lines matches this.

1019

:param random_id: If True a random id has been selected rather than

1020

an id determined by some deterministic process such as a converter

1021

from a foreign VCS. When True the backend may choose not to check

1022

for uniqueness of the resulting key within the versioned file, so

1023

this should only be done when the result is expected to be unique

1024

anyway.

1025

:param check_content: If True, the lines supplied are verified to be

1026

bytestrings that are correctly formed lines.

1027

:return: The text sha1, the number of bytes in the text, and an opaque

1028

representation of the inserted version which can be provided

1029

back to future add_lines calls in the parent_texts dictionary.

1030

"""

1031

self._index._check_write_ok()

1032

self._check_add(key, lines, random_id, check_content)

1033

if parents is None:

1034

# The caller might pass None if there is no graph data, but kndx

1035

# indexes can't directly store that, so we give them

1036

# an empty tuple instead.

1037

parents = ()

1038

# double handling for now. Make it work until then.

1039

length = sum(map(len, lines))

1040

record = ChunkedContentFactory(key, parents, None, lines)

1041

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1042

nostore_sha=nostore_sha))[0]

1043

return sha1, length, None

1044

1045

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1046

"""See VersionedFiles._add_text()."""

1047

self._index._check_write_ok()

1048

self._check_add(key, None, random_id, check_content=False)

1049

if text.__class__ is not str:

1050

raise errors.BzrBadParameterUnicode("text")

1051

if parents is None:

1052

# The caller might pass None if there is no graph data, but kndx

1053

# indexes can't directly store that, so we give them

1054

# an empty tuple instead.

1055

parents = ()

1056

# double handling for now. Make it work until then.

1057

length = len(text)

1058

record = FulltextContentFactory(key, parents, None, text)

1059

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1060

nostore_sha=nostore_sha))[0]

1061

return sha1, length, None

1062

1063

def add_fallback_versioned_files(self, a_versioned_files):

1064

"""Add a source of texts for texts not present in this knit.

1065

1066

:param a_versioned_files: A VersionedFiles object.

1067

"""

1068

self._fallback_vfs.append(a_versioned_files)

1069

1070

def annotate(self, key):

1071

"""See VersionedFiles.annotate."""

1072

ann = annotate.Annotator(self)

1073

return ann.annotate_flat(key)

1074

1075

def get_annotator(self):

1076

return annotate.Annotator(self)

1077

1078

def check(self, progress_bar=None, keys=None):

1079

"""See VersionedFiles.check()."""

1080

if keys is None:

1081

keys = self.keys()

1082

for record in self.get_record_stream(keys, 'unordered', True):

1083

record.get_bytes_as('fulltext')

1084

else:

1085

return self.get_record_stream(keys, 'unordered', True)

1086

1087

def _check_add(self, key, lines, random_id, check_content):

1088

"""check that version_id and lines are safe to add."""

1089

version_id = key[-1]

1090

if version_id is not None:

1091

if osutils.contains_whitespace(version_id):

1092

raise errors.InvalidRevisionId(version_id, self)

1093

self.check_not_reserved_id(version_id)

1094

# TODO: If random_id==False and the key is already present, we should

1095

# probably check that the existing content is identical to what is

1096

# being inserted, and otherwise raise an exception. This would make

1097

# the bundle code simpler.

1098

if check_content:

1099

self._check_lines_not_unicode(lines)

1100

self._check_lines_are_lines(lines)

1101

1102

def get_parent_map(self, keys):

1103

"""Get a map of the graph parents of keys.

1104

1105

:param keys: The keys to look up parents for.

1106

:return: A mapping from keys to parents. Absent keys are absent from

1107

the mapping.

1108

"""

1109

return self._get_parent_map_with_sources(keys)[0]

1110

1111

def _get_parent_map_with_sources(self, keys):

1112

"""Get a map of the parents of keys.

1113

1114

:param keys: The keys to look up parents for.

1115

:return: A tuple. The first element is a mapping from keys to parents.

1116

Absent keys are absent from the mapping. The second element is a

1117

list with the locations each key was found in. The first element

1118

is the in-this-knit parents, the second the first fallback source,

1119

and so on.

1120

"""

1121

result = {}

1122

sources = [self._index] + self._fallback_vfs

1123

source_results = []

1124

missing = set(keys)

1125

for source in sources:

1126

if not missing:

1127

break

1128

new_result = source.get_parent_map(missing)

1129

source_results.append(new_result)

1130

result.update(new_result)

1131

missing.difference_update(set(new_result))

1132

return result, source_results

1133

1134

def _get_block(self, index_memo):

1135

read_memo = index_memo[0:3]

1136

# get the group:

1137

try:

1138

block = self._group_cache[read_memo]

1139

except KeyError:

1140

# read the group

1141

zdata = self._access.get_raw_records([read_memo]).next()

1142

# decompress - whole thing - this is not a bug, as it

1143

# permits caching. We might want to store the partially

1144

# decompresed group and decompress object, so that recent

1145

# texts are not penalised by big groups.

1146

block = GroupCompressBlock.from_bytes(zdata)

1147

self._group_cache[read_memo] = block

1148

# cheapo debugging:

1149

# print len(zdata), len(plain)

1150

# parse - requires split_lines, better to have byte offsets

1151

# here (but not by much - we only split the region for the

1152

# recipe, and we often want to end up with lines anyway.

1153

return block

1154

1155

def get_missing_compression_parent_keys(self):

1156

"""Return the keys of missing compression parents.

1157

1158

Missing compression parents occur when a record stream was missing

1159

basis texts, or a index was scanned that had missing basis texts.

1160

"""

1161

# GroupCompress cannot currently reference texts that are not in the

1162

# group, so this is valid for now

1163

return frozenset()

1164

1165

def get_record_stream(self, keys, ordering, include_delta_closure):

1166

"""Get a stream of records for keys.

1167

1168

:param keys: The keys to include.

1169

:param ordering: Either 'unordered' or 'topological'. A topologically

1170

sorted stream has compression parents strictly before their

1171

children.

1172

:param include_delta_closure: If True then the closure across any

1173

compression parents will be included (in the opaque data).

1174

:return: An iterator of ContentFactory objects, each of which is only

1175

valid until the iterator is advanced.

1176

"""

1177

# keys might be a generator

1178

orig_keys = list(keys)

1179

keys = set(keys)

1180

if not keys:

1181

return

1182

if (not self._index.has_graph

1183

and ordering in ('topological', 'groupcompress')):

1184

# Cannot topological order when no graph has been stored.

1185

# but we allow 'as-requested' or 'unordered'

1186

ordering = 'unordered'

1187

1188

remaining_keys = keys

1189

while True:

1190

try:

1191

keys = set(remaining_keys)

1192

for content_factory in self._get_remaining_record_stream(keys,

1193

orig_keys, ordering, include_delta_closure):

1194

remaining_keys.discard(content_factory.key)

1195

yield content_factory

1196

return

1197

except errors.RetryWithNewPacks, e:

1198

self._access.reload_or_raise(e)

1199

1200

def _find_from_fallback(self, missing):

1201

"""Find whatever keys you can from the fallbacks.

1202

1203

:param missing: A set of missing keys. This set will be mutated as keys

1204

are found from a fallback_vfs

1205

:return: (parent_map, key_to_source_map, source_results)

1206

parent_map the overall key => parent_keys

1207

key_to_source_map a dict from {key: source}

1208

source_results a list of (source: keys)

1209

"""

1210

parent_map = {}

1211

key_to_source_map = {}

1212

source_results = []

1213

for source in self._fallback_vfs:

1214

if not missing:

1215

break

1216

source_parents = source.get_parent_map(missing)

1217

parent_map.update(source_parents)

1218

source_parents = list(source_parents)

1219

source_results.append((source, source_parents))

1220

key_to_source_map.update((key, source) for key in source_parents)

1221

missing.difference_update(source_parents)

1222

return parent_map, key_to_source_map, source_results

1223

1224

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1225

"""Get the (source, [keys]) list.

1226

1227

The returned objects should be in the order defined by 'ordering',

1228

which can weave between different sources.

1229

:param ordering: Must be one of 'topological' or 'groupcompress'

1230

:return: List of [(source, [keys])] tuples, such that all keys are in

1231

the defined order, regardless of source.

1232

"""

1233

if ordering == 'topological':

1234

present_keys = topo_sort(parent_map)

1235

else:

1236

# ordering == 'groupcompress'

1237

# XXX: This only optimizes for the target ordering. We may need

1238

# to balance that with the time it takes to extract

1239

# ordering, by somehow grouping based on

1240

# locations[key][0:3]

1241

present_keys = sort_gc_optimal(parent_map)

1242

# Now group by source:

1243

source_keys = []

1244

current_source = None

1245

for key in present_keys:

1246

source = key_to_source_map.get(key, self)

1247

if source is not current_source:

1248

source_keys.append((source, []))

1249

current_source = source

1250

source_keys[-1][1].append(key)

1251

return source_keys

1252

1253

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1254

key_to_source_map):

1255

source_keys = []

1256

current_source = None

1257

for key in orig_keys:

1258

if key in locations or key in unadded_keys:

1259

source = self

1260

elif key in key_to_source_map:

1261

source = key_to_source_map[key]

1262

else: # absent

1263

continue

1264

if source is not current_source:

1265

source_keys.append((source, []))

1266

current_source = source

1267

source_keys[-1][1].append(key)

1268

return source_keys

1269

1270

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1271

source_result):

1272

def get_group(key):

1273

# This is the group the bytes are stored in, followed by the

1274

# location in the group

1275

return locations[key][0]

1276

present_keys = sorted(locations.iterkeys(), key=get_group)

1277

# We don't have an ordering for keys in the in-memory object, but

1278

# lets process the in-memory ones first.

1279

present_keys = list(unadded_keys) + present_keys

1280

# Now grab all of the ones from other sources

1281

source_keys = [(self, present_keys)]

1282

source_keys.extend(source_result)

1283

return source_keys

1284

1285

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1286

include_delta_closure):

1287

"""Get a stream of records for keys.

1288

1289

:param keys: The keys to include.

1290

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1291

'as-requested'

1292

:param include_delta_closure: If True then the closure across any

1293

compression parents will be included (in the opaque data).

1294

:return: An iterator of ContentFactory objects, each of which is only

1295

valid until the iterator is advanced.

1296

"""

1297

# Cheap: iterate

1298

locations = self._index.get_build_details(keys)

1299

unadded_keys = set(self._unadded_refs).intersection(keys)

1300

missing = keys.difference(locations)

1301

missing.difference_update(unadded_keys)

1302

(fallback_parent_map, key_to_source_map,

1303

source_result) = self._find_from_fallback(missing)

1304

if ordering in ('topological', 'groupcompress'):

1305

# would be better to not globally sort initially but instead

1306

# start with one key, recurse to its oldest parent, then grab

1307

# everything in the same group, etc.

1308

parent_map = dict((key, details[2]) for key, details in

1309

locations.iteritems())

1310

for key in unadded_keys:

1311

parent_map[key] = self._unadded_refs[key]

1312

parent_map.update(fallback_parent_map)

1313

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1314

key_to_source_map)

1315

elif ordering == 'as-requested':

1316

source_keys = self._get_as_requested_source_keys(orig_keys,

1317

locations, unadded_keys, key_to_source_map)

1318

else:

1319

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1320

# Otherwise we thrash the _group_cache and destroy performance

1321

source_keys = self._get_io_ordered_source_keys(locations,

1322

unadded_keys, source_result)

1323

for key in missing:

1324

yield AbsentContentFactory(key)

1325

manager = None

1326

last_read_memo = None

1327

# TODO: This works fairly well at batching up existing groups into a

1328

# streamable format, and possibly allowing for taking one big

1329

# group and splitting it when it isn't fully utilized.

1330

# However, it doesn't allow us to find under-utilized groups and

1331

# combine them into a bigger group on the fly.

1332

# (Consider the issue with how chk_map inserts texts

1333

# one-at-a-time.) This could be done at insert_record_stream()

1334

# time, but it probably would decrease the number of

1335

# bytes-on-the-wire for fetch.

1336

for source, keys in source_keys:

1337

if source is self:

1338

for key in keys:

1339

if key in self._unadded_refs:

1340

if manager is not None:

1341

for factory in manager.get_record_stream():

1342

yield factory

1343

last_read_memo = manager = None

1344

bytes, sha1 = self._compressor.extract(key)

1345

parents = self._unadded_refs[key]

1346

yield FulltextContentFactory(key, parents, sha1, bytes)

1347

else:

1348

index_memo, _, parents, (method, _) = locations[key]

1349

read_memo = index_memo[0:3]

1350

if last_read_memo != read_memo:

1351

# We are starting a new block. If we have a

1352

# manager, we have found everything that fits for

1353

# now, so yield records

1354

if manager is not None:

1355

for factory in manager.get_record_stream():

1356

yield factory

1357

# Now start a new manager

1358

block = self._get_block(index_memo)

1359

manager = _LazyGroupContentManager(block)

1360

last_read_memo = read_memo

1361

start, end = index_memo[3:5]

1362

manager.add_factory(key, parents, start, end)

1363

else:

1364

if manager is not None:

1365

for factory in manager.get_record_stream():

1366

yield factory

1367

last_read_memo = manager = None

1368

for record in source.get_record_stream(keys, ordering,

1369

include_delta_closure):

1370

yield record

1371

if manager is not None:

1372

for factory in manager.get_record_stream():

1373

yield factory

1374

1375

def get_sha1s(self, keys):

1376

"""See VersionedFiles.get_sha1s()."""

1377

result = {}

1378

for record in self.get_record_stream(keys, 'unordered', True):

1379

if record.sha1 != None:

1380

result[record.key] = record.sha1

1381

else:

1382

if record.storage_kind != 'absent':

1383

result[record.key] = osutils.sha_string(

1384

record.get_bytes_as('fulltext'))

1385

return result

1386

1387

def insert_record_stream(self, stream):

1388

"""Insert a record stream into this container.

1389

1390

:param stream: A stream of records to insert.

1391

:return: None

1392

:seealso VersionedFiles.get_record_stream:

1393

"""

1394

# XXX: Setting random_id=True makes

1395

# test_insert_record_stream_existing_keys fail for groupcompress and

1396

# groupcompress-nograph, this needs to be revisited while addressing

1397

# 'bzr branch' performance issues.

1398

for _ in self._insert_record_stream(stream, random_id=False):

1399

pass

1400

1401

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1402

reuse_blocks=True):

1403

"""Internal core to insert a record stream into this container.

1404

1405

This helper function has a different interface than insert_record_stream

1406

to allow add_lines to be minimal, but still return the needed data.

1407

1408

:param stream: A stream of records to insert.

1409

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1410

raise ExistingContent, rather than committing the new text.

1411

:param reuse_blocks: If the source is streaming from

1412

groupcompress-blocks, just insert the blocks as-is, rather than

1413

expanding the texts and inserting again.

1414

:return: An iterator over the sha1 of the inserted records.

1415

:seealso insert_record_stream:

1416

:seealso add_lines:

1417

"""

1418

adapters = {}

1419

def get_adapter(adapter_key):

1420

try:

1421

return adapters[adapter_key]

1422

except KeyError:

1423

adapter_factory = adapter_registry.get(adapter_key)

1424

adapter = adapter_factory(self)

1425

adapters[adapter_key] = adapter

1426

return adapter

1427

# This will go up to fulltexts for gc to gc fetching, which isn't

1428

# ideal.

1429

self._compressor = GroupCompressor()

1430

self._unadded_refs = {}

1431

keys_to_add = []

1432

def flush():

1433

bytes = self._compressor.flush().to_bytes()

1434

index, start, length = self._access.add_raw_records(

1435

[(None, len(bytes))], bytes)[0]

1436

nodes = []

1437

for key, reads, refs in keys_to_add:

1438

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1439

self._index.add_records(nodes, random_id=random_id)

1440

self._unadded_refs = {}

1441

del keys_to_add[:]

1442

self._compressor = GroupCompressor()

1443

1444

last_prefix = None

1445

max_fulltext_len = 0

1446

max_fulltext_prefix = None

1447

insert_manager = None

1448

block_start = None

1449

block_length = None

1450

# XXX: TODO: remove this, it is just for safety checking for now

1451

inserted_keys = set()

1452

for record in stream:

1453

# Raise an error when a record is missing.

1454

if record.storage_kind == 'absent':

1455

raise errors.RevisionNotPresent(record.key, self)

1456

if random_id:

1457

if record.key in inserted_keys:

1458

trace.note('Insert claimed random_id=True,'

1459

' but then inserted %r two times', record.key)

1460

continue

1461

inserted_keys.add(record.key)

1462

if reuse_blocks:

1463

# If the reuse_blocks flag is set, check to see if we can just

1464

# copy a groupcompress block as-is.

1465

if record.storage_kind == 'groupcompress-block':

1466

# Insert the raw block into the target repo

1467

insert_manager = record._manager

1468

insert_manager._check_rebuild_block()

1469

bytes = record._manager._block.to_bytes()

1470

_, start, length = self._access.add_raw_records(

1471

[(None, len(bytes))], bytes)[0]

1472

del bytes

1473

block_start = start

1474

block_length = length

1475

if record.storage_kind in ('groupcompress-block',

1476

'groupcompress-block-ref'):

1477

if insert_manager is None:

1478

raise AssertionError('No insert_manager set')

1479

value = "%d %d %d %d" % (block_start, block_length,

1480

record._start, record._end)

1481

nodes = [(record.key, value, (record.parents,))]

1482

# TODO: Consider buffering up many nodes to be added, not

1483

# sure how much overhead this has, but we're seeing

1484

# ~23s / 120s in add_records calls

1485

self._index.add_records(nodes, random_id=random_id)

1486

continue

1487

try:

1488

bytes = record.get_bytes_as('fulltext')

1489

except errors.UnavailableRepresentation:

1490

adapter_key = record.storage_kind, 'fulltext'

1491

adapter = get_adapter(adapter_key)

1492

bytes = adapter.get_bytes(record)

1493

if len(record.key) > 1:

1494

prefix = record.key[0]

1495

soft = (prefix == last_prefix)

1496

else:

1497

prefix = None

1498

soft = False

1499

if max_fulltext_len < len(bytes):

1500

max_fulltext_len = len(bytes)

1501

max_fulltext_prefix = prefix

1502

(found_sha1, start_point, end_point,

1503

type) = self._compressor.compress(record.key,

1504

bytes, record.sha1, soft=soft,

1505

nostore_sha=nostore_sha)

1506

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1507

# Check if we want to continue to include that text

1508

if (prefix == max_fulltext_prefix

1509

and end_point < 2 * max_fulltext_len):

1510

# As long as we are on the same file_id, we will fill at least

1511

# 2 * max_fulltext_len

1512

start_new_block = False

1513

elif end_point > 4*1024*1024:

1514

start_new_block = True

1515

elif (prefix is not None and prefix != last_prefix

1516

and end_point > 2*1024*1024):

1517

start_new_block = True

1518

else:

1519

start_new_block = False

1520

last_prefix = prefix

1521

if start_new_block:

1522

self._compressor.pop_last()

1523

flush()

1524

max_fulltext_len = len(bytes)

1525

(found_sha1, start_point, end_point,

1526

type) = self._compressor.compress(record.key, bytes,

1527

record.sha1)

1528

if record.key[-1] is None:

1529

key = record.key[:-1] + ('sha1:' + found_sha1,)

1530

else:

1531

key = record.key

1532

self._unadded_refs[key] = record.parents

1533

yield found_sha1

1534

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1535

(record.parents,)))

1536

if len(keys_to_add):

1537

flush()

1538

self._compressor = None

1539

1540

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1541

"""Iterate over the lines in the versioned files from keys.

1542

1543

This may return lines from other keys. Each item the returned

1544

iterator yields is a tuple of a line and a text version that that line

1545

is present in (not introduced in).

1546

1547

Ordering of results is in whatever order is most suitable for the

1548

underlying storage format.

1549

1550

If a progress bar is supplied, it may be used to indicate progress.

1551

The caller is responsible for cleaning up progress bars (because this

1552

is an iterator).

1553

1554

NOTES:

1555

* Lines are normalised by the underlying store: they will all have \n

1556

terminators.

1557

* Lines are returned in arbitrary order.

1558

1559

:return: An iterator over (line, key).

1560

"""

1561

keys = set(keys)

1562

total = len(keys)

1563

# we don't care about inclusions, the caller cares.

1564

# but we need to setup a list of records to visit.

1565

# we need key, position, length

1566

for key_idx, record in enumerate(self.get_record_stream(keys,

1567

'unordered', True)):

1568

# XXX: todo - optimise to use less than full texts.

1569

key = record.key

1570

if pb is not None:

1571

pb.update('Walking content', key_idx, total)

1572

if record.storage_kind == 'absent':

1573

raise errors.RevisionNotPresent(key, self)

1574

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1575

for line in lines:

1576

yield line, key

1577

if pb is not None:

1578

pb.update('Walking content', total, total)

1579

1580

def keys(self):

1581

"""See VersionedFiles.keys."""

1582

if 'evil' in debug.debug_flags:

1583

trace.mutter_callsite(2, "keys scales with size of history")

1584

sources = [self._index] + self._fallback_vfs

1585

result = set()

1586

for source in sources:

1587

result.update(source.keys())

1588

return result

1589

1590

1591

class _GCGraphIndex(object):

1592

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1593

1594

def __init__(self, graph_index, is_locked, parents=True,

1595

add_callback=None, track_external_parent_refs=False,

1596

inconsistency_fatal=True):

1597

"""Construct a _GCGraphIndex on a graph_index.

1598

1599

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1600

:param is_locked: A callback, returns True if the index is locked and

1601

thus usable.

1602

:param parents: If True, record knits parents, if not do not record

1603

parents.

1604

:param add_callback: If not None, allow additions to the index and call

1605

this callback with a list of added GraphIndex nodes:

1606

[(node, value, node_refs), ...]

1607

:param track_external_parent_refs: As keys are added, keep track of the

1608

keys they reference, so that we can query get_missing_parents(),

1609

etc.

1610

:param inconsistency_fatal: When asked to add records that are already

1611

present, and the details are inconsistent with the existing

1612

record, raise an exception instead of warning (and skipping the

1613

record).

1614

"""

1615

self._add_callback = add_callback

1616

self._graph_index = graph_index

1617

self._parents = parents

1618

self.has_graph = parents

1619

self._is_locked = is_locked

1620

self._inconsistency_fatal = inconsistency_fatal

1621

if track_external_parent_refs:

1622

self._key_dependencies = knit._KeyRefs()

1623

else:

1624

self._key_dependencies = None

1625

1626

def add_records(self, records, random_id=False):

1627

"""Add multiple records to the index.

1628

1629

This function does not insert data into the Immutable GraphIndex

1630

backing the KnitGraphIndex, instead it prepares data for insertion by

1631

the caller and checks that it is safe to insert then calls

1632

self._add_callback with the prepared GraphIndex nodes.

1633

1634

:param records: a list of tuples:

1635

(key, options, access_memo, parents).

1636

:param random_id: If True the ids being added were randomly generated

1637

and no check for existence will be performed.

1638

"""

1639

if not self._add_callback:

1640

raise errors.ReadOnlyError(self)

1641

# we hope there are no repositories with inconsistent parentage

1642

# anymore.

1643

1644

changed = False

1645

keys = {}

1646

for (key, value, refs) in records:

1647

if not self._parents:

1648

if refs:

1649

for ref in refs:

1650

if ref:

1651

raise errors.KnitCorrupt(self,

1652

"attempt to add node with parents "

1653

"in parentless index.")

1654

refs = ()

1655

changed = True

1656

keys[key] = (value, refs)

1657

# check for dups

1658

if not random_id:

1659

present_nodes = self._get_entries(keys)

1660

for (index, key, value, node_refs) in present_nodes:

1661

if node_refs != keys[key][1]:

1662

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1663

if self._inconsistency_fatal:

1664

raise errors.KnitCorrupt(self, "inconsistent details"

1665

" in add_records: %s" %

1666

details)

1667

else:

1668

trace.warning("inconsistent details in skipped"

1669

" record: %s", details)

1670

del keys[key]

1671

changed = True

1672

if changed:

1673

result = []

1674

if self._parents:

1675

for key, (value, node_refs) in keys.iteritems():

1676

result.append((key, value, node_refs))

1677

else:

1678

for key, (value, node_refs) in keys.iteritems():

1679

result.append((key, value))

1680

records = result

1681

key_dependencies = self._key_dependencies

1682

if key_dependencies is not None and self._parents:

1683

for key, value, refs in records:

1684

parents = refs[0]

1685

key_dependencies.add_references(key, parents)

1686

self._add_callback(records)

1687

1688

def _check_read(self):

1689

"""Raise an exception if reads are not permitted."""

1690

if not self._is_locked():

1691

raise errors.ObjectNotLocked(self)

1692

1693

def _check_write_ok(self):

1694

"""Raise an exception if writes are not permitted."""

1695

if not self._is_locked():

1696

raise errors.ObjectNotLocked(self)

1697

1698

def _get_entries(self, keys, check_present=False):

1699

"""Get the entries for keys.

1700

1701

Note: Callers are responsible for checking that the index is locked

1702

before calling this method.

1703

1704

:param keys: An iterable of index key tuples.

1705

"""

1706

keys = set(keys)

1707

found_keys = set()

1708

if self._parents:

1709

for node in self._graph_index.iter_entries(keys):

1710

yield node

1711

found_keys.add(node[1])

1712

else:

1713

# adapt parentless index to the rest of the code.

1714

for node in self._graph_index.iter_entries(keys):

1715

yield node[0], node[1], node[2], ()

1716

found_keys.add(node[1])

1717

if check_present:

1718

missing_keys = keys.difference(found_keys)

1719

if missing_keys:

1720

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1721

1722

def get_parent_map(self, keys):

1723

"""Get a map of the parents of keys.

1724

1725

:param keys: The keys to look up parents for.

1726

:return: A mapping from keys to parents. Absent keys are absent from

1727

the mapping.

1728

"""

1729

self._check_read()

1730

nodes = self._get_entries(keys)

1731

result = {}

1732

if self._parents:

1733

for node in nodes:

1734

result[node[1]] = node[3][0]

1735

else:

1736

for node in nodes:

1737

result[node[1]] = None

1738

return result

1739

1740

def get_missing_parents(self):

1741

"""Return the keys of missing parents."""

1742

# Copied from _KnitGraphIndex.get_missing_parents

1743

# We may have false positives, so filter those out.

1744

self._key_dependencies.add_keys(

1745

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1746

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1747

1748

def get_build_details(self, keys):

1749

"""Get the various build details for keys.

1750

1751

Ghosts are omitted from the result.

1752

1753

:param keys: An iterable of keys.

1754

:return: A dict of key:

1755

(index_memo, compression_parent, parents, record_details).

1756

index_memo

1757

opaque structure to pass to read_records to extract the raw

1758

data

1759

compression_parent

1760

Content that this record is built upon, may be None

1761

parents

1762

Logical parents of this node

1763

record_details

1764

extra information about the content which needs to be passed to

1765

Factory.parse_record

1766

"""

1767

self._check_read()

1768

result = {}

1769

entries = self._get_entries(keys)

1770

for entry in entries:

1771

key = entry[1]

1772

if not self._parents:

1773

parents = None

1774

else:

1775

parents = entry[3][0]

1776

method = 'group'

1777

result[key] = (self._node_to_position(entry),

1778

None, parents, (method, None))

1779

return result

1780

1781

def keys(self):

1782

"""Get all the keys in the collection.

1783

1784

The keys are not ordered.

1785

"""

1786

self._check_read()

1787

return [node[1] for node in self._graph_index.iter_all_entries()]

1788

1789

def _node_to_position(self, node):

1790

"""Convert an index value to position details."""

1791

bits = node[2].split(' ')

1792

# It would be nice not to read the entire gzip.

1793

start = int(bits[0])

1794

stop = int(bits[1])

1795

basis_end = int(bits[2])

1796

delta_end = int(bits[3])

1797

return node[0], start, stop, basis_end, delta_end

1798

1799

def scan_unvalidated_index(self, graph_index):

1800

"""Inform this _GCGraphIndex that there is an unvalidated index.

1801

1802

This allows this _GCGraphIndex to keep track of any missing

1803

compression parents we may want to have filled in to make those

1804

indices valid.

1805

1806

:param graph_index: A GraphIndex

1807

"""

1808

if self._key_dependencies is not None:

1809

# Add parent refs from graph_index (and discard parent refs that

1810

# the graph_index has).

1811

add_refs = self._key_dependencies.add_references

1812

for node in graph_index.iter_all_entries():

1813

add_refs(node[1], node[3][0])

1814

1815

1816

1817

from bzrlib._groupcompress_py import (

1818

apply_delta,

1819

apply_delta_to_source,

1820

encode_base128_int,

1821

decode_base128_int,

1822

decode_copy_instruction,

1823

LinesDeltaIndex,

1824

)

1825

try:

1826

from bzrlib._groupcompress_pyx import (

1827

apply_delta,

1828

apply_delta_to_source,

1829

DeltaIndex,

1830

encode_base128_int,

1831

decode_base128_int,

1832

)

1833

GroupCompressor = PyrexGroupCompressor

1834

except ImportError:

1835

GroupCompressor = PythonGroupCompressor

1836

Older »