~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-08-27 01:34:47 UTC
mfrom: (4650.2.2 bug-393677)
Revision ID: pqm@pqm.ubuntu.com-20090827013447-ndjtt02ad7nfdoiy

(robertc) Pass IncompatibleRepositories error over the smart server.
(Robert Collins)

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/clean_tree.py

bzrlib/crash.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/inventory_delta.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/python-compat.h

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/util/bencode.py

bzrlib/views.py

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/conf.py

doc/contents.txt

doc/developers/apport.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/content-filtering.txt

doc/developers/cycle.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/lca_tree_merging.txt

doc/en/migration

doc/en/migration/index.txt

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/index-for-2x.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/shelving_changes.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/index.ru.txt

doc/make.bat

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/check-newsbugs.py

tools/packaging/lp-upload-release

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files removed:
bzrlib/_walkdirs_win32.h

bzrlib/help_topics/en/hooks.txt

bzrlib/tests/test_http_implementations.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/tests/test_bencode.py

doc/developers/performance-contributing.txt

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/tests/ftp_server.py => bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

doc/en/quick-reference/Makefile => doc/_static/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/_static/en/quick-reference/bzr-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/_static/en/quick-reference/bzr-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/_static/en/quick-reference/bzr-quick-reference.svg

doc/developers/HACKING.txt => doc/en/developer-guide/HACKING.txt

doc/es/guia-desarrollador/ => doc/es/developer-guide/

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/quick-reference/quick-start-summary.svg

doc/es/notas-version/ => doc/es/release-notes/

doc/es/guia-usuario/ => doc/es/user-guide/

doc/es/referencia/ => doc/es/user-reference/

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_btree_serializer_py.py

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/chunk_writer.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/file_utils.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/treeshape.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

contrib/bzr_access

contrib/newinventory.py

contrib/pwclient.full

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/performance-roadmap.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/revision-properties.txt

doc/developers/testing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

doc/es/quick-reference/Makefile

doc/index.es.txt

doc/index.txt

profile_imports.py

setup.py

tools/bzr_epydoc_uid.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/package_mf.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/weavebench.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.graph import Graph

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

def __init__(self):

# map by key? or just order in file?

100

self._compressor_name = None

101

self._z_content = None

102

self._z_content_decompressor = None

103

self._z_content_length = None

104

self._content_length = None

105

self._content = None

106

self._content_chunks = None

107

108

def __len__(self):

109

# This is the maximum number of bytes this object will reference if

110

# everything is decompressed. However, if we decompress less than

111

# everything... (this would cause some problems for LRUSizeCache)

112

return self._content_length + self._z_content_length

113

114

def _ensure_content(self, num_bytes=None):

115

"""Make sure that content has been expanded enough.

116

117

:param num_bytes: Ensure that we have extracted at least num_bytes of

118

content. If None, consume everything

119

"""

120

# TODO: If we re-use the same content block at different times during

121

# get_record_stream(), it is possible that the first pass will

122

# get inserted, triggering an extract/_ensure_content() which

123

# will get rid of _z_content. And then the next use of the block

124

# will try to access _z_content (to send it over the wire), and

125

# fail because it is already extracted. Consider never releasing

126

# _z_content because of this.

127

if num_bytes is None:

128

num_bytes = self._content_length

129

elif (self._content_length is not None

130

and num_bytes > self._content_length):

131

raise AssertionError(

132

'requested num_bytes (%d) > content length (%d)'

133

% (num_bytes, self._content_length))

134

# Expand the content if required

135

if self._content is None:

136

if self._content_chunks is not None:

137

self._content = ''.join(self._content_chunks)

138

self._content_chunks = None

139

if self._content is None:

140

if self._z_content is None:

141

raise AssertionError('No content to decompress')

142

if self._z_content == '':

143

self._content = ''

144

elif self._compressor_name == 'lzma':

145

# We don't do partial lzma decomp yet

146

self._content = pylzma.decompress(self._z_content)

147

elif self._compressor_name == 'zlib':

148

# Start a zlib decompressor

149

if num_bytes is None:

150

self._content = zlib.decompress(self._z_content)

151

else:

152

self._z_content_decompressor = zlib.decompressobj()

153

# Seed the decompressor with the uncompressed bytes, so

154

# that the rest of the code is simplified

155

self._content = self._z_content_decompressor.decompress(

156

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

157

else:

158

raise AssertionError('Unknown compressor: %r'

159

% self._compressor_name)

160

# Any bytes remaining to be decompressed will be in the decompressors

161

# 'unconsumed_tail'

162

163

# Do we have enough bytes already?

164

if num_bytes is not None and len(self._content) >= num_bytes:

165

return

166

if num_bytes is None and self._z_content_decompressor is None:

167

# We must have already decompressed everything

168

return

169

# If we got this far, and don't have a decompressor, something is wrong

170

if self._z_content_decompressor is None:

171

raise AssertionError(

172

'No decompressor to decompress %d bytes' % num_bytes)

173

remaining_decomp = self._z_content_decompressor.unconsumed_tail

174

if num_bytes is None:

175

if remaining_decomp:

176

# We don't know how much is left, but we'll decompress it all

177

self._content += self._z_content_decompressor.decompress(

178

remaining_decomp)

179

# Note: There's what I consider a bug in zlib.decompressobj

180

# If you pass back in the entire unconsumed_tail, only

181

# this time you don't pass a max-size, it doesn't

182

# change the unconsumed_tail back to None/''.

183

# However, we know we are done with the whole stream

184

self._z_content_decompressor = None

185

# XXX: Why is this the only place in this routine we set this?

186

self._content_length = len(self._content)

187

else:

188

if not remaining_decomp:

189

raise AssertionError('Nothing left to decompress')

190

needed_bytes = num_bytes - len(self._content)

191

# We always set max_size to 32kB over the minimum needed, so that

192

# zlib will give us as much as we really want.

193

# TODO: If this isn't good enough, we could make a loop here,

194

# that keeps expanding the request until we get enough

195

self._content += self._z_content_decompressor.decompress(

196

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

197

if len(self._content) < num_bytes:

198

raise AssertionError('%d bytes wanted, only %d available'

199

% (num_bytes, len(self._content)))

200

if not self._z_content_decompressor.unconsumed_tail:

201

# The stream is finished

202

self._z_content_decompressor = None

203

204

def _parse_bytes(self, bytes, pos):

205

"""Read the various lengths from the header.

206

207

This also populates the various 'compressed' buffers.

208

209

:return: The position in bytes just after the last newline

210

"""

211

# At present, we have 2 integers for the compressed and uncompressed

212

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

213

# checking too far, cap the search to 14 bytes.

214

pos2 = bytes.index('\n', pos, pos + 14)

215

self._z_content_length = int(bytes[pos:pos2])

216

pos = pos2 + 1

217

pos2 = bytes.index('\n', pos, pos + 14)

218

self._content_length = int(bytes[pos:pos2])

219

pos = pos2 + 1

220

if len(bytes) != (pos + self._z_content_length):

221

# XXX: Define some GCCorrupt error ?

222

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

223

(len(bytes), pos, self._z_content_length))

224

self._z_content = bytes[pos:]

225

226

@classmethod

227

def from_bytes(cls, bytes):

228

out = cls()

229

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

230

raise ValueError('bytes did not start with any of %r'

231

% (cls.GCB_KNOWN_HEADERS,))

232

# XXX: why not testing the whole header ?

233

if bytes[4] == 'z':

234

out._compressor_name = 'zlib'

235

elif bytes[4] == 'l':

236

out._compressor_name = 'lzma'

237

else:

238

raise ValueError('unknown compressor: %r' % (bytes,))

239

out._parse_bytes(bytes, 6)

240

return out

241

242

def extract(self, key, start, end, sha1=None):

243

"""Extract the text for a specific key.

244

245

:param key: The label used for this content

246

:param sha1: TODO (should we validate only when sha1 is supplied?)

247

:return: The bytes for the content

248

"""

249

if start == end == 0:

250

return ''

251

self._ensure_content(end)

252

# The bytes are 'f' or 'd' for the type, then a variable-length

253

# base128 integer for the content size, then the actual content

254

# We know that the variable-length integer won't be longer than 5

255

# bytes (it takes 5 bytes to encode 2^32)

256

c = self._content[start]

257

if c == 'f':

258

type = 'fulltext'

259

else:

260

if c != 'd':

261

raise ValueError('Unknown content control code: %s'

262

% (c,))

263

type = 'delta'

264

content_len, len_len = decode_base128_int(

265

self._content[start + 1:start + 6])

266

content_start = start + 1 + len_len

267

if end != content_start + content_len:

268

raise ValueError('end != len according to field header'

269

' %s != %s' % (end, content_start + content_len))

270

if c == 'f':

271

bytes = self._content[content_start:end]

272

elif c == 'd':

273

bytes = apply_delta_to_source(self._content, content_start, end)

274

return bytes

275

276

def set_chunked_content(self, content_chunks, length):

277

"""Set the content of this block to the given chunks."""

278

# If we have lots of short lines, it is may be more efficient to join

279

# the content ahead of time. If the content is <10MiB, we don't really

280

# care about the extra memory consumption, so we can just pack it and

281

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

282

# mysql, which is below the noise margin

283

self._content_length = length

284

self._content_chunks = content_chunks

285

self._content = None

286

self._z_content = None

287

288

def set_content(self, content):

289

"""Set the content of this block."""

290

self._content_length = len(content)

291

self._content = content

292

self._z_content = None

293

294

def _create_z_content_using_lzma(self):

295

if self._content_chunks is not None:

296

self._content = ''.join(self._content_chunks)

297

self._content_chunks = None

298

if self._content is None:

299

raise AssertionError('Nothing to compress')

300

self._z_content = pylzma.compress(self._content)

301

self._z_content_length = len(self._z_content)

302

303

def _create_z_content_from_chunks(self):

304

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

305

compressed_chunks = map(compressor.compress, self._content_chunks)

306

compressed_chunks.append(compressor.flush())

307

self._z_content = ''.join(compressed_chunks)

308

self._z_content_length = len(self._z_content)

309

310

def _create_z_content(self):

311

if self._z_content is not None:

312

return

313

if _USE_LZMA:

314

self._create_z_content_using_lzma()

315

return

316

if self._content_chunks is not None:

317

self._create_z_content_from_chunks()

318

return

319

self._z_content = zlib.compress(self._content)

320

self._z_content_length = len(self._z_content)

321

322

def to_bytes(self):

323

"""Encode the information into a byte stream."""

324

self._create_z_content()

325

if _USE_LZMA:

326

header = self.GCB_LZ_HEADER

327

else:

328

header = self.GCB_HEADER

329

chunks = [header,

330

'%d\n%d\n' % (self._z_content_length, self._content_length),

331

self._z_content,

332

]

333

return ''.join(chunks)

334

335

def _dump(self, include_text=False):

336

"""Take this block, and spit out a human-readable structure.

337

338

:param include_text: Inserts also include text bits, chose whether you

339

want this displayed in the dump or not.

340

:return: A dump of the given block. The layout is something like:

341

[('f', length), ('d', delta_length, text_length, [delta_info])]

342

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

343

...]

344

"""

345

self._ensure_content()

346

result = []

347

pos = 0

348

while pos < self._content_length:

349

kind = self._content[pos]

350

pos += 1

351

if kind not in ('f', 'd'):

352

raise ValueError('invalid kind character: %r' % (kind,))

353

content_len, len_len = decode_base128_int(

354

self._content[pos:pos + 5])

355

pos += len_len

356

if content_len + pos > self._content_length:

357

raise ValueError('invalid content_len %d for record @ pos %d'

358

% (content_len, pos - len_len - 1))

359

if kind == 'f': # Fulltext

360

if include_text:

361

text = self._content[pos:pos+content_len]

362

result.append(('f', content_len, text))

363

else:

364

result.append(('f', content_len))

365

elif kind == 'd': # Delta

366

delta_content = self._content[pos:pos+content_len]

367

delta_info = []

368

# The first entry in a delta is the decompressed length

369

decomp_len, delta_pos = decode_base128_int(delta_content)

370

result.append(('d', content_len, decomp_len, delta_info))

371

measured_len = 0

372

while delta_pos < content_len:

373

c = ord(delta_content[delta_pos])

374

delta_pos += 1

375

if c & 0x80: # Copy

376

(offset, length,

377

delta_pos) = decode_copy_instruction(delta_content, c,

378

delta_pos)

379

if include_text:

380

text = self._content[offset:offset+length]

381

delta_info.append(('c', offset, length, text))

382

else:

383

delta_info.append(('c', offset, length))

384

measured_len += length

385

else: # Insert

386

if include_text:

387

txt = delta_content[delta_pos:delta_pos+c]

388

else:

389

txt = ''

390

delta_info.append(('i', c, txt))

391

measured_len += c

392

delta_pos += c

393

if delta_pos != content_len:

394

raise ValueError('Delta consumed a bad number of bytes:'

395

' %d != %d' % (delta_pos, content_len))

396

if measured_len != decomp_len:

397

raise ValueError('Delta claimed fulltext was %d bytes, but'

398

' extraction resulted in %d bytes'

399

% (decomp_len, measured_len))

400

pos += content_len

401

return result

402

403

404

class _LazyGroupCompressFactory(object):

405

"""Yield content from a GroupCompressBlock on demand."""

406

407

def __init__(self, key, parents, manager, start, end, first):

408

"""Create a _LazyGroupCompressFactory

409

410

:param key: The key of just this record

411

:param parents: The parents of this key (possibly None)

412

:param gc_block: A GroupCompressBlock object

413

:param start: Offset of the first byte for this record in the

414

uncompressd content

415

:param end: Offset of the byte just after the end of this record

416

(ie, bytes = content[start:end])

417

:param first: Is this the first Factory for the given block?

418

"""

419

self.key = key

420

self.parents = parents

421

self.sha1 = None

422

# Note: This attribute coupled with Manager._factories creates a

423

# reference cycle. Perhaps we would rather use a weakref(), or

424

# find an appropriate time to release the ref. After the first

425

# get_bytes_as call? After Manager.get_record_stream() returns

426

# the object?

427

self._manager = manager

428

self._bytes = None

429

self.storage_kind = 'groupcompress-block'

430

if not first:

431

self.storage_kind = 'groupcompress-block-ref'

432

self._first = first

433

self._start = start

434

self._end = end

435

436

def __repr__(self):

437

return '%s(%s, first=%s)' % (self.__class__.__name__,

438

self.key, self._first)

439

440

def get_bytes_as(self, storage_kind):

441

if storage_kind == self.storage_kind:

442

if self._first:

443

# wire bytes, something...

444

return self._manager._wire_bytes()

445

else:

446

return ''

447

if storage_kind in ('fulltext', 'chunked'):

448

if self._bytes is None:

449

# Grab and cache the raw bytes for this entry

450

# and break the ref-cycle with _manager since we don't need it

451

# anymore

452

self._manager._prepare_for_extract()

453

block = self._manager._block

454

self._bytes = block.extract(self.key, self._start, self._end)

455

# There are code paths that first extract as fulltext, and then

456

# extract as storage_kind (smart fetch). So we don't break the

457

# refcycle here, but instead in manager.get_record_stream()

458

# self._manager = None

459

if storage_kind == 'fulltext':

460

return self._bytes

461

else:

462

return [self._bytes]

463

raise errors.UnavailableRepresentation(self.key, storage_kind,

464

self.storage_kind)

465

466

467

class _LazyGroupContentManager(object):

468

"""This manages a group of _LazyGroupCompressFactory objects."""

469

470

def __init__(self, block):

471

self._block = block

472

# We need to preserve the ordering

473

self._factories = []

474

self._last_byte = 0

475

476

def add_factory(self, key, parents, start, end):

477

if not self._factories:

478

first = True

479

else:

480

first = False

481

# Note that this creates a reference cycle....

482

factory = _LazyGroupCompressFactory(key, parents, self,

483

start, end, first=first)

484

# max() works here, but as a function call, doing a compare seems to be

485

# significantly faster, timeit says 250ms for max() and 100ms for the

486

# comparison

487

if end > self._last_byte:

488

self._last_byte = end

489

self._factories.append(factory)

490

491

def get_record_stream(self):

492

"""Get a record for all keys added so far."""

493

for factory in self._factories:

494

yield factory

495

# Break the ref-cycle

496

factory._bytes = None

497

factory._manager = None

498

# TODO: Consider setting self._factories = None after the above loop,

499

# as it will break the reference cycle

500

501

def _trim_block(self, last_byte):

502

"""Create a new GroupCompressBlock, with just some of the content."""

503

# None of the factories need to be adjusted, because the content is

504

# located in an identical place. Just that some of the unreferenced

505

# trailing bytes are stripped

506

trace.mutter('stripping trailing bytes from groupcompress block'

507

' %d => %d', self._block._content_length, last_byte)

508

new_block = GroupCompressBlock()

509

self._block._ensure_content(last_byte)

510

new_block.set_content(self._block._content[:last_byte])

511

self._block = new_block

512

513

def _rebuild_block(self):

514

"""Create a new GroupCompressBlock with only the referenced texts."""

515

compressor = GroupCompressor()

516

tstart = time.time()

517

old_length = self._block._content_length

518

end_point = 0

519

for factory in self._factories:

520

bytes = factory.get_bytes_as('fulltext')

521

(found_sha1, start_point, end_point,

522

type) = compressor.compress(factory.key, bytes, factory.sha1)

523

# Now update this factory with the new offsets, etc

524

factory.sha1 = found_sha1

525

factory._start = start_point

526

factory._end = end_point

527

self._last_byte = end_point

528

new_block = compressor.flush()

529

# TODO: Should we check that new_block really *is* smaller than the old

530

# block? It seems hard to come up with a method that it would

531

# expand, since we do full compression again. Perhaps based on a

532

# request that ends up poorly ordered?

533

delta = time.time() - tstart

534

self._block = new_block

535

trace.mutter('creating new compressed block on-the-fly in %.3fs'

536

' %d bytes => %d bytes', delta, old_length,

537

self._block._content_length)

538

539

def _prepare_for_extract(self):

540

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

541

# We expect that if one child is going to fulltext, all will be. This

542

# helps prevent all of them from extracting a small amount at a time.

543

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

544

# time (self._block._content) is a little expensive.

545

self._block._ensure_content(self._last_byte)

546

547

def _check_rebuild_block(self):

548

"""Check to see if our block should be repacked."""

549

total_bytes_used = 0

550

last_byte_used = 0

551

for factory in self._factories:

552

total_bytes_used += factory._end - factory._start

553

last_byte_used = max(last_byte_used, factory._end)

554

# If we are using most of the bytes from the block, we have nothing

555

# else to check (currently more that 1/2)

556

if total_bytes_used * 2 >= self._block._content_length:

557

return

558

# Can we just strip off the trailing bytes? If we are going to be

559

# transmitting more than 50% of the front of the content, go ahead

560

if total_bytes_used * 2 > last_byte_used:

561

self._trim_block(last_byte_used)

562

return

563

564

# We are using a small amount of the data, and it isn't just packed

565

# nicely at the front, so rebuild the content.

566

# Note: This would be *nicer* as a strip-data-from-group, rather than

567

# building it up again from scratch

568

# It might be reasonable to consider the fulltext sizes for

569

# different bits when deciding this, too. As you may have a small

570

# fulltext, and a trivial delta, and you are just trading around

571

# for another fulltext. If we do a simple 'prune' you may end up

572

# expanding many deltas into fulltexts, as well.

573

# If we build a cheap enough 'strip', then we could try a strip,

574

# if that expands the content, we then rebuild.

575

self._rebuild_block()

576

577

def _wire_bytes(self):

578

"""Return a byte stream suitable for transmitting over the wire."""

579

self._check_rebuild_block()

580

# The outer block starts with:

581

# 'groupcompress-block\n'

582

# <length of compressed key info>\n

583

# <length of uncompressed info>\n

584

# <length of gc block>\n

585

# <header bytes>

586

# <gc-block>

587

lines = ['groupcompress-block\n']

588

# The minimal info we need is the key, the start offset, and the

589

# parents. The length and type are encoded in the record itself.

590

# However, passing in the other bits makes it easier. The list of

591

# keys, and the start offset, the length

592

# 1 line key

593

# 1 line with parents, '' for ()

594

# 1 line for start offset

595

# 1 line for end byte

596

header_lines = []

597

for factory in self._factories:

598

key_bytes = '\x00'.join(factory.key)

599

parents = factory.parents

600

if parents is None:

601

parent_bytes = 'None:'

602

else:

603

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

604

record_header = '%s\n%s\n%d\n%d\n' % (

605

key_bytes, parent_bytes, factory._start, factory._end)

606

header_lines.append(record_header)

607

# TODO: Can we break the refcycle at this point and set

608

# factory._manager = None?

609

header_bytes = ''.join(header_lines)

610

del header_lines

611

header_bytes_len = len(header_bytes)

612

z_header_bytes = zlib.compress(header_bytes)

613

del header_bytes

614

z_header_bytes_len = len(z_header_bytes)

615

block_bytes = self._block.to_bytes()

616

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

617

len(block_bytes)))

618

lines.append(z_header_bytes)

619

lines.append(block_bytes)

620

del z_header_bytes, block_bytes

621

return ''.join(lines)

622

623

@classmethod

624

def from_bytes(cls, bytes):

625

# TODO: This does extra string copying, probably better to do it a

626

# different way

627

(storage_kind, z_header_len, header_len,

628

block_len, rest) = bytes.split('\n', 4)

629

del bytes

630

if storage_kind != 'groupcompress-block':

631

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

632

z_header_len = int(z_header_len)

633

if len(rest) < z_header_len:

634

raise ValueError('Compressed header len shorter than all bytes')

635

z_header = rest[:z_header_len]

636

header_len = int(header_len)

637

header = zlib.decompress(z_header)

638

if len(header) != header_len:

639

raise ValueError('invalid length for decompressed bytes')

640

del z_header

641

block_len = int(block_len)

642

if len(rest) != z_header_len + block_len:

643

raise ValueError('Invalid length for block')

644

block_bytes = rest[z_header_len:]

645

del rest

646

# So now we have a valid GCB, we just need to parse the factories that

647

# were sent to us

648

header_lines = header.split('\n')

649

del header

650

last = header_lines.pop()

651

if last != '':

652

raise ValueError('header lines did not end with a trailing'

653

' newline')

654

if len(header_lines) % 4 != 0:

655

raise ValueError('The header was not an even multiple of 4 lines')

656

block = GroupCompressBlock.from_bytes(block_bytes)

657

del block_bytes

658

result = cls(block)

659

for start in xrange(0, len(header_lines), 4):

660

# intern()?

661

key = tuple(header_lines[start].split('\x00'))

662

parents_line = header_lines[start+1]

663

if parents_line == 'None:':

664

parents = None

665

else:

666

parents = tuple([tuple(segment.split('\x00'))

667

for segment in parents_line.split('\t')

668

if segment])

669

start_offset = int(header_lines[start+2])

670

end_offset = int(header_lines[start+3])

671

result.add_factory(key, parents, start_offset, end_offset)

672

return result

673

674

675

def network_block_to_records(storage_kind, bytes, line_end):

676

if storage_kind != 'groupcompress-block':

677

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

678

manager = _LazyGroupContentManager.from_bytes(bytes)

679

return manager.get_record_stream()

680

681

682

class _CommonGroupCompressor(object):

683

684

def __init__(self):

685

"""Create a GroupCompressor."""

686

self.chunks = []

687

self._last = None

688

self.endpoint = 0

689

self.input_bytes = 0

690

self.labels_deltas = {}

691

self._delta_index = None # Set by the children

692

self._block = GroupCompressBlock()

693

694

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

695

"""Compress lines with label key.

696

697

:param key: A key tuple. It is stored in the output

698

for identification of the text during decompression. If the last

699

element is 'None' it is replaced with the sha1 of the text -

700

e.g. sha1:xxxxxxx.

701

:param bytes: The bytes to be compressed

702

:param expected_sha: If non-None, the sha the lines are believed to

703

have. During compression the sha is calculated; a mismatch will

704

cause an error.

705

:param nostore_sha: If the computed sha1 sum matches, we will raise

706

ExistingContent rather than adding the text.

707

:param soft: Do a 'soft' compression. This means that we require larger

708

ranges to match to be considered for a copy command.

709

710

:return: The sha1 of lines, the start and end offsets in the delta, and

711

the type ('fulltext' or 'delta').

712

713

:seealso VersionedFiles.add_lines:

714

"""

715

if not bytes: # empty, like a dir entry, etc

716

if nostore_sha == _null_sha1:

717

raise errors.ExistingContent()

718

return _null_sha1, 0, 0, 'fulltext'

719

# we assume someone knew what they were doing when they passed it in

720

if expected_sha is not None:

721

sha1 = expected_sha

722

else:

723

sha1 = osutils.sha_string(bytes)

724

if nostore_sha is not None:

725

if sha1 == nostore_sha:

726

raise errors.ExistingContent()

727

if key[-1] is None:

728

key = key[:-1] + ('sha1:' + sha1,)

729

730

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

731

return sha1, start, end, type

732

733

def _compress(self, key, bytes, max_delta_size, soft=False):

734

"""Compress lines with label key.

735

736

:param key: A key tuple. It is stored in the output for identification

737

of the text during decompression.

738

739

:param bytes: The bytes to be compressed

740

741

:param max_delta_size: The size above which we issue a fulltext instead

742

of a delta.

743

744

:param soft: Do a 'soft' compression. This means that we require larger

745

ranges to match to be considered for a copy command.

746

747

:return: The sha1 of lines, the start and end offsets in the delta, and

748

the type ('fulltext' or 'delta').

749

"""

750

raise NotImplementedError(self._compress)

751

752

def extract(self, key):

753

"""Extract a key previously added to the compressor.

754

755

:param key: The key to extract.

756

:return: An iterable over bytes and the sha1.

757

"""

758

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

759

delta_chunks = self.chunks[start_chunk:end_chunk]

760

stored_bytes = ''.join(delta_chunks)

761

if stored_bytes[0] == 'f':

762

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

763

data_len = fulltext_len + 1 + offset

764

if data_len != len(stored_bytes):

765

raise ValueError('Index claimed fulltext len, but stored bytes'

766

' claim %s != %s'

767

% (len(stored_bytes), data_len))

768

bytes = stored_bytes[offset + 1:]

769

else:

770

# XXX: This is inefficient at best

771

source = ''.join(self.chunks[:start_chunk])

772

if stored_bytes[0] != 'd':

773

raise ValueError('Unknown content kind, bytes claim %s'

774

% (stored_bytes[0],))

775

delta_len, offset = decode_base128_int(stored_bytes[1:10])

776

data_len = delta_len + 1 + offset

777

if data_len != len(stored_bytes):

778

raise ValueError('Index claimed delta len, but stored bytes'

779

' claim %s != %s'

780

% (len(stored_bytes), data_len))

781

bytes = apply_delta(source, stored_bytes[offset + 1:])

782

bytes_sha1 = osutils.sha_string(bytes)

783

return bytes, bytes_sha1

784

785

def flush(self):

786

"""Finish this group, creating a formatted stream.

787

788

After calling this, the compressor should no longer be used

789

"""

790

# TODO: this causes us to 'bloat' to 2x the size of content in the

791

# group. This has an impact for 'commit' of large objects.

792

# One possibility is to use self._content_chunks, and be lazy and

793

# only fill out self._content as a full string when we actually

794

# need it. That would at least drop the peak memory consumption

795

# for 'commit' down to ~1x the size of the largest file, at a

796

# cost of increased complexity within this code. 2x is still <<

797

# 3x the size of the largest file, so we are doing ok.

798

self._block.set_chunked_content(self.chunks, self.endpoint)

799

self.chunks = None

800

self._delta_index = None

801

return self._block

802

803

def pop_last(self):

804

"""Call this if you want to 'revoke' the last compression.

805

806

After this, the data structures will be rolled back, but you cannot do

807

more compression.

808

"""

809

self._delta_index = None

810

del self.chunks[self._last[0]:]

811

self.endpoint = self._last[1]

812

self._last = None

813

814

def ratio(self):

815

"""Return the overall compression ratio."""

816

return float(self.input_bytes) / float(self.endpoint)

817

818

819

class PythonGroupCompressor(_CommonGroupCompressor):

820

821

def __init__(self):

822

"""Create a GroupCompressor.

823

824

Used only if the pyrex version is not available.

825

"""

826

super(PythonGroupCompressor, self).__init__()

827

self._delta_index = LinesDeltaIndex([])

828

# The actual content is managed by LinesDeltaIndex

829

self.chunks = self._delta_index.lines

830

831

def _compress(self, key, bytes, max_delta_size, soft=False):

832

"""see _CommonGroupCompressor._compress"""

833

input_len = len(bytes)

834

new_lines = osutils.split_lines(bytes)

835

out_lines, index_lines = self._delta_index.make_delta(

836

new_lines, bytes_length=input_len, soft=soft)

837

delta_length = sum(map(len, out_lines))

838

if delta_length > max_delta_size:

839

# The delta is longer than the fulltext, insert a fulltext

840

type = 'fulltext'

841

out_lines = ['f', encode_base128_int(input_len)]

842

out_lines.extend(new_lines)

843

index_lines = [False, False]

844

index_lines.extend([True] * len(new_lines))

845

else:

846

# this is a worthy delta, output it

847

type = 'delta'

848

out_lines[0] = 'd'

849

# Update the delta_length to include those two encoded integers

850

out_lines[1] = encode_base128_int(delta_length)

851

# Before insertion

852

start = self.endpoint

853

chunk_start = len(self.chunks)

854

self._last = (chunk_start, self.endpoint)

855

self._delta_index.extend_lines(out_lines, index_lines)

856

self.endpoint = self._delta_index.endpoint

857

self.input_bytes += input_len

858

chunk_end = len(self.chunks)

859

self.labels_deltas[key] = (start, chunk_start,

860

self.endpoint, chunk_end)

861

return start, self.endpoint, type

862

863

864

class PyrexGroupCompressor(_CommonGroupCompressor):

865

"""Produce a serialised group of compressed texts.

866

867

It contains code very similar to SequenceMatcher because of having a similar

868

task. However some key differences apply:

869

- there is no junk, we want a minimal edit not a human readable diff.

870

- we don't filter very common lines (because we don't know where a good

871

range will start, and after the first text we want to be emitting minmal

872

edits only.

873

- we chain the left side, not the right side

874

- we incrementally update the adjacency matrix as new lines are provided.

875

- we look for matches in all of the left side, so the routine which does

876

the analagous task of find_longest_match does not need to filter on the

877

left side.

878

"""

879

880

def __init__(self):

881

super(PyrexGroupCompressor, self).__init__()

882

self._delta_index = DeltaIndex()

883

884

def _compress(self, key, bytes, max_delta_size, soft=False):

885

"""see _CommonGroupCompressor._compress"""

886

input_len = len(bytes)

887

# By having action/label/sha1/len, we can parse the group if the index

888

# was ever destroyed, we have the key in 'label', we know the final

889

# bytes are valid from sha1, and we know where to find the end of this

890

# record because of 'len'. (the delta record itself will store the

891

# total length for the expanded record)

892

# 'len: %d\n' costs approximately 1% increase in total data

893

# Having the labels at all costs us 9-10% increase, 38% increase for

894

# inventory pages, and 5.8% increase for text pages

895

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

896

if self._delta_index._source_offset != self.endpoint:

897

raise AssertionError('_source_offset != endpoint'

898

' somehow the DeltaIndex got out of sync with'

899

' the output lines')

900

delta = self._delta_index.make_delta(bytes, max_delta_size)

901

if (delta is None):

902

type = 'fulltext'

903

enc_length = encode_base128_int(len(bytes))

904

len_mini_header = 1 + len(enc_length)

905

self._delta_index.add_source(bytes, len_mini_header)

906

new_chunks = ['f', enc_length, bytes]

907

else:

908

type = 'delta'

909

enc_length = encode_base128_int(len(delta))

910

len_mini_header = 1 + len(enc_length)

911

new_chunks = ['d', enc_length, delta]

912

self._delta_index.add_delta_source(delta, len_mini_header)

913

# Before insertion

914

start = self.endpoint

915

chunk_start = len(self.chunks)

916

# Now output these bytes

917

self._output_chunks(new_chunks)

918

self.input_bytes += input_len

919

chunk_end = len(self.chunks)

920

self.labels_deltas[key] = (start, chunk_start,

921

self.endpoint, chunk_end)

922

if not self._delta_index._source_offset == self.endpoint:

923

raise AssertionError('the delta index is out of sync'

924

'with the output lines %s != %s'

925

% (self._delta_index._source_offset, self.endpoint))

926

return start, self.endpoint, type

927

928

def _output_chunks(self, new_chunks):

929

"""Output some chunks.

930

931

:param new_chunks: The chunks to output.

932

"""

933

self._last = (len(self.chunks), self.endpoint)

934

endpoint = self.endpoint

935

self.chunks.extend(new_chunks)

936

endpoint += sum(map(len, new_chunks))

937

self.endpoint = endpoint

938

939

940

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

941

"""Create a factory for creating a pack based groupcompress.

942

943

This is only functional enough to run interface tests, it doesn't try to

944

provide a full pack environment.

945

946

:param graph: Store a graph.

947

:param delta: Delta compress contents.

948

:param keylength: How long should keys be.

949

"""

950

def factory(transport):

951

parents = graph

952

ref_length = 0

953

if graph:

954

ref_length = 1

955

graph_index = BTreeBuilder(reference_lists=ref_length,

956

key_elements=keylength)

957

stream = transport.open_write_stream('newpack')

958

writer = pack.ContainerWriter(stream.write)

959

writer.begin()

960

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

961

add_callback=graph_index.add_nodes,

962

inconsistency_fatal=inconsistency_fatal)

963

access = knit._DirectPackAccess({})

964

access.set_writer(writer, graph_index, (transport, 'newpack'))

965

result = GroupCompressVersionedFiles(index, access, delta)

966

result.stream = stream

967

result.writer = writer

968

return result

969

return factory

970

971

972

def cleanup_pack_group(versioned_files):

973

versioned_files.writer.end()

974

versioned_files.stream.close()

975

976

977

class GroupCompressVersionedFiles(VersionedFiles):

978

"""A group-compress based VersionedFiles implementation."""

979

980

def __init__(self, index, access, delta=True):

981

"""Create a GroupCompressVersionedFiles object.

982

983

:param index: The index object storing access and graph data.

984

:param access: The access object storing raw data.

985

:param delta: Whether to delta compress or just entropy compress.

986

"""

987

self._index = index

988

self._access = access

989

self._delta = delta

990

self._unadded_refs = {}

991

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

992

self._fallback_vfs = []

993

994

def add_lines(self, key, parents, lines, parent_texts=None,

995

left_matching_blocks=None, nostore_sha=None, random_id=False,

996

check_content=True):

997

"""Add a text to the store.

998

999

:param key: The key tuple of the text to add.

1000

:param parents: The parents key tuples of the text to add.

1001

:param lines: A list of lines. Each line must be a bytestring. And all

1002

of them except the last must be terminated with \n and contain no

1003

other \n's. The last line may either contain no \n's or a single

1004

terminating \n. If the lines list does meet this constraint the add

1005

routine may error or may succeed - but you will be unable to read

1006

the data back accurately. (Checking the lines have been split

1007

correctly is expensive and extremely unlikely to catch bugs so it

1008

is not done at runtime unless check_content is True.)

1009

:param parent_texts: An optional dictionary containing the opaque

1010

representations of some or all of the parents of version_id to

1011

allow delta optimisations. VERY IMPORTANT: the texts must be those

1012

returned by add_lines or data corruption can be caused.

1013

:param left_matching_blocks: a hint about which areas are common

1014

between the text and its left-hand-parent. The format is

1015

the SequenceMatcher.get_matching_blocks format.

1016

:param nostore_sha: Raise ExistingContent and do not add the lines to

1017

the versioned file if the digest of the lines matches this.

1018

:param random_id: If True a random id has been selected rather than

1019

an id determined by some deterministic process such as a converter

1020

from a foreign VCS. When True the backend may choose not to check

1021

for uniqueness of the resulting key within the versioned file, so

1022

this should only be done when the result is expected to be unique

1023

anyway.

1024

:param check_content: If True, the lines supplied are verified to be

1025

bytestrings that are correctly formed lines.

1026

:return: The text sha1, the number of bytes in the text, and an opaque

1027

representation of the inserted version which can be provided

1028

back to future add_lines calls in the parent_texts dictionary.

1029

"""

1030

self._index._check_write_ok()

1031

self._check_add(key, lines, random_id, check_content)

1032

if parents is None:

1033

# The caller might pass None if there is no graph data, but kndx

1034

# indexes can't directly store that, so we give them

1035

# an empty tuple instead.

1036

parents = ()

1037

# double handling for now. Make it work until then.

1038

length = sum(map(len, lines))

1039

record = ChunkedContentFactory(key, parents, None, lines)

1040

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1041

nostore_sha=nostore_sha))[0]

1042

return sha1, length, None

1043

1044

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1045

"""See VersionedFiles._add_text()."""

1046

self._index._check_write_ok()

1047

self._check_add(key, None, random_id, check_content=False)

1048

if text.__class__ is not str:

1049

raise errors.BzrBadParameterUnicode("text")

1050

if parents is None:

1051

# The caller might pass None if there is no graph data, but kndx

1052

# indexes can't directly store that, so we give them

1053

# an empty tuple instead.

1054

parents = ()

1055

# double handling for now. Make it work until then.

1056

length = len(text)

1057

record = FulltextContentFactory(key, parents, None, text)

1058

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1059

nostore_sha=nostore_sha))[0]

1060

return sha1, length, None

1061

1062

def add_fallback_versioned_files(self, a_versioned_files):

1063

"""Add a source of texts for texts not present in this knit.

1064

1065

:param a_versioned_files: A VersionedFiles object.

1066

"""

1067

self._fallback_vfs.append(a_versioned_files)

1068

1069

def annotate(self, key):

1070

"""See VersionedFiles.annotate."""

1071

ann = annotate.Annotator(self)

1072

return ann.annotate_flat(key)

1073

1074

def get_annotator(self):

1075

return annotate.Annotator(self)

1076

1077

def check(self, progress_bar=None, keys=None):

1078

"""See VersionedFiles.check()."""

1079

if keys is None:

1080

keys = self.keys()

1081

for record in self.get_record_stream(keys, 'unordered', True):

1082

record.get_bytes_as('fulltext')

1083

else:

1084

return self.get_record_stream(keys, 'unordered', True)

1085

1086

def _check_add(self, key, lines, random_id, check_content):

1087

"""check that version_id and lines are safe to add."""

1088

version_id = key[-1]

1089

if version_id is not None:

1090

if osutils.contains_whitespace(version_id):

1091

raise errors.InvalidRevisionId(version_id, self)

1092

self.check_not_reserved_id(version_id)

1093

# TODO: If random_id==False and the key is already present, we should

1094

# probably check that the existing content is identical to what is

1095

# being inserted, and otherwise raise an exception. This would make

1096

# the bundle code simpler.

1097

if check_content:

1098

self._check_lines_not_unicode(lines)

1099

self._check_lines_are_lines(lines)

1100

1101

def get_known_graph_ancestry(self, keys):

1102

"""Get a KnownGraph instance with the ancestry of keys."""

1103

parent_map, missing_keys = self._index._graph_index.find_ancestry(keys,

1104

1105

kg = _mod_graph.KnownGraph(parent_map)

1106

return kg

1107

1108

def get_parent_map(self, keys):

1109

"""Get a map of the graph parents of keys.

1110

1111

:param keys: The keys to look up parents for.

1112

:return: A mapping from keys to parents. Absent keys are absent from

1113

the mapping.

1114

"""

1115

return self._get_parent_map_with_sources(keys)[0]

1116

1117

def _get_parent_map_with_sources(self, keys):

1118

"""Get a map of the parents of keys.

1119

1120

:param keys: The keys to look up parents for.

1121

:return: A tuple. The first element is a mapping from keys to parents.

1122

Absent keys are absent from the mapping. The second element is a

1123

list with the locations each key was found in. The first element

1124

is the in-this-knit parents, the second the first fallback source,

1125

and so on.

1126

"""

1127

result = {}

1128

sources = [self._index] + self._fallback_vfs

1129

source_results = []

1130

missing = set(keys)

1131

for source in sources:

1132

if not missing:

1133

break

1134

new_result = source.get_parent_map(missing)

1135

source_results.append(new_result)

1136

result.update(new_result)

1137

missing.difference_update(set(new_result))

1138

return result, source_results

1139

1140

def _get_block(self, index_memo):

1141

read_memo = index_memo[0:3]

1142

# get the group:

1143

try:

1144

block = self._group_cache[read_memo]

1145

except KeyError:

1146

# read the group

1147

zdata = self._access.get_raw_records([read_memo]).next()

1148

# decompress - whole thing - this is not a bug, as it

1149

# permits caching. We might want to store the partially

1150

# decompresed group and decompress object, so that recent

1151

# texts are not penalised by big groups.

1152

block = GroupCompressBlock.from_bytes(zdata)

1153

self._group_cache[read_memo] = block

1154

# cheapo debugging:

1155

# print len(zdata), len(plain)

1156

# parse - requires split_lines, better to have byte offsets

1157

# here (but not by much - we only split the region for the

1158

# recipe, and we often want to end up with lines anyway.

1159

return block

1160

1161

def get_missing_compression_parent_keys(self):

1162

"""Return the keys of missing compression parents.

1163

1164

Missing compression parents occur when a record stream was missing

1165

basis texts, or a index was scanned that had missing basis texts.

1166

"""

1167

# GroupCompress cannot currently reference texts that are not in the

1168

# group, so this is valid for now

1169

return frozenset()

1170

1171

def get_record_stream(self, keys, ordering, include_delta_closure):

1172

"""Get a stream of records for keys.

1173

1174

:param keys: The keys to include.

1175

:param ordering: Either 'unordered' or 'topological'. A topologically

1176

sorted stream has compression parents strictly before their

1177

children.

1178

:param include_delta_closure: If True then the closure across any

1179

compression parents will be included (in the opaque data).

1180

:return: An iterator of ContentFactory objects, each of which is only

1181

valid until the iterator is advanced.

1182

"""

1183

# keys might be a generator

1184

orig_keys = list(keys)

1185

keys = set(keys)

1186

if not keys:

1187

return

1188

if (not self._index.has_graph

1189

and ordering in ('topological', 'groupcompress')):

1190

# Cannot topological order when no graph has been stored.

1191

# but we allow 'as-requested' or 'unordered'

1192

ordering = 'unordered'

1193

1194

remaining_keys = keys

1195

while True:

1196

try:

1197

keys = set(remaining_keys)

1198

for content_factory in self._get_remaining_record_stream(keys,

1199

orig_keys, ordering, include_delta_closure):

1200

remaining_keys.discard(content_factory.key)

1201

yield content_factory

1202

return

1203

except errors.RetryWithNewPacks, e:

1204

self._access.reload_or_raise(e)

1205

1206

def _find_from_fallback(self, missing):

1207

"""Find whatever keys you can from the fallbacks.

1208

1209

:param missing: A set of missing keys. This set will be mutated as keys

1210

are found from a fallback_vfs

1211

:return: (parent_map, key_to_source_map, source_results)

1212

parent_map the overall key => parent_keys

1213

key_to_source_map a dict from {key: source}

1214

source_results a list of (source: keys)

1215

"""

1216

parent_map = {}

1217

key_to_source_map = {}

1218

source_results = []

1219

for source in self._fallback_vfs:

1220

if not missing:

1221

break

1222

source_parents = source.get_parent_map(missing)

1223

parent_map.update(source_parents)

1224

source_parents = list(source_parents)

1225

source_results.append((source, source_parents))

1226

key_to_source_map.update((key, source) for key in source_parents)

1227

missing.difference_update(source_parents)

1228

return parent_map, key_to_source_map, source_results

1229

1230

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1231

"""Get the (source, [keys]) list.

1232

1233

The returned objects should be in the order defined by 'ordering',

1234

which can weave between different sources.

1235

:param ordering: Must be one of 'topological' or 'groupcompress'

1236

:return: List of [(source, [keys])] tuples, such that all keys are in

1237

the defined order, regardless of source.

1238

"""

1239

if ordering == 'topological':

1240

present_keys = topo_sort(parent_map)

1241

else:

1242

# ordering == 'groupcompress'

1243

# XXX: This only optimizes for the target ordering. We may need

1244

# to balance that with the time it takes to extract

1245

# ordering, by somehow grouping based on

1246

# locations[key][0:3]

1247

present_keys = sort_gc_optimal(parent_map)

1248

# Now group by source:

1249

source_keys = []

1250

current_source = None

1251

for key in present_keys:

1252

source = key_to_source_map.get(key, self)

1253

if source is not current_source:

1254

source_keys.append((source, []))

1255

current_source = source

1256

source_keys[-1][1].append(key)

1257

return source_keys

1258

1259

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1260

key_to_source_map):

1261

source_keys = []

1262

current_source = None

1263

for key in orig_keys:

1264

if key in locations or key in unadded_keys:

1265

source = self

1266

elif key in key_to_source_map:

1267

source = key_to_source_map[key]

1268

else: # absent

1269

continue

1270

if source is not current_source:

1271

source_keys.append((source, []))

1272

current_source = source

1273

source_keys[-1][1].append(key)

1274

return source_keys

1275

1276

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1277

source_result):

1278

def get_group(key):

1279

# This is the group the bytes are stored in, followed by the

1280

# location in the group

1281

return locations[key][0]

1282

present_keys = sorted(locations.iterkeys(), key=get_group)

1283

# We don't have an ordering for keys in the in-memory object, but

1284

# lets process the in-memory ones first.

1285

present_keys = list(unadded_keys) + present_keys

1286

# Now grab all of the ones from other sources

1287

source_keys = [(self, present_keys)]

1288

source_keys.extend(source_result)

1289

return source_keys

1290

1291

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1292

include_delta_closure):

1293

"""Get a stream of records for keys.

1294

1295

:param keys: The keys to include.

1296

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1297

'as-requested'

1298

:param include_delta_closure: If True then the closure across any

1299

compression parents will be included (in the opaque data).

1300

:return: An iterator of ContentFactory objects, each of which is only

1301

valid until the iterator is advanced.

1302

"""

1303

# Cheap: iterate

1304

locations = self._index.get_build_details(keys)

1305

unadded_keys = set(self._unadded_refs).intersection(keys)

1306

missing = keys.difference(locations)

1307

missing.difference_update(unadded_keys)

1308

(fallback_parent_map, key_to_source_map,

1309

source_result) = self._find_from_fallback(missing)

1310

if ordering in ('topological', 'groupcompress'):

1311

# would be better to not globally sort initially but instead

1312

# start with one key, recurse to its oldest parent, then grab

1313

# everything in the same group, etc.

1314

parent_map = dict((key, details[2]) for key, details in

1315

locations.iteritems())

1316

for key in unadded_keys:

1317

parent_map[key] = self._unadded_refs[key]

1318

parent_map.update(fallback_parent_map)

1319

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1320

key_to_source_map)

1321

elif ordering == 'as-requested':

1322

source_keys = self._get_as_requested_source_keys(orig_keys,

1323

locations, unadded_keys, key_to_source_map)

1324

else:

1325

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1326

# Otherwise we thrash the _group_cache and destroy performance

1327

source_keys = self._get_io_ordered_source_keys(locations,

1328

unadded_keys, source_result)

1329

for key in missing:

1330

yield AbsentContentFactory(key)

1331

manager = None

1332

last_read_memo = None

1333

# TODO: This works fairly well at batching up existing groups into a

1334

# streamable format, and possibly allowing for taking one big

1335

# group and splitting it when it isn't fully utilized.

1336

# However, it doesn't allow us to find under-utilized groups and

1337

# combine them into a bigger group on the fly.

1338

# (Consider the issue with how chk_map inserts texts

1339

# one-at-a-time.) This could be done at insert_record_stream()

1340

# time, but it probably would decrease the number of

1341

# bytes-on-the-wire for fetch.

1342

for source, keys in source_keys:

1343

if source is self:

1344

for key in keys:

1345

if key in self._unadded_refs:

1346

if manager is not None:

1347

for factory in manager.get_record_stream():

1348

yield factory

1349

last_read_memo = manager = None

1350

bytes, sha1 = self._compressor.extract(key)

1351

parents = self._unadded_refs[key]

1352

yield FulltextContentFactory(key, parents, sha1, bytes)

1353

else:

1354

index_memo, _, parents, (method, _) = locations[key]

1355

read_memo = index_memo[0:3]

1356

if last_read_memo != read_memo:

1357

# We are starting a new block. If we have a

1358

# manager, we have found everything that fits for

1359

# now, so yield records

1360

if manager is not None:

1361

for factory in manager.get_record_stream():

1362

yield factory

1363

# Now start a new manager

1364

block = self._get_block(index_memo)

1365

manager = _LazyGroupContentManager(block)

1366

last_read_memo = read_memo

1367

start, end = index_memo[3:5]

1368

manager.add_factory(key, parents, start, end)

1369

else:

1370

if manager is not None:

1371

for factory in manager.get_record_stream():

1372

yield factory

1373

last_read_memo = manager = None

1374

for record in source.get_record_stream(keys, ordering,

1375

include_delta_closure):

1376

yield record

1377

if manager is not None:

1378

for factory in manager.get_record_stream():

1379

yield factory

1380

1381

def get_sha1s(self, keys):

1382

"""See VersionedFiles.get_sha1s()."""

1383

result = {}

1384

for record in self.get_record_stream(keys, 'unordered', True):

1385

if record.sha1 != None:

1386

result[record.key] = record.sha1

1387

else:

1388

if record.storage_kind != 'absent':

1389

result[record.key] = osutils.sha_string(

1390

record.get_bytes_as('fulltext'))

1391

return result

1392

1393

def insert_record_stream(self, stream):

1394

"""Insert a record stream into this container.

1395

1396

:param stream: A stream of records to insert.

1397

:return: None

1398

:seealso VersionedFiles.get_record_stream:

1399

"""

1400

# XXX: Setting random_id=True makes

1401

# test_insert_record_stream_existing_keys fail for groupcompress and

1402

# groupcompress-nograph, this needs to be revisited while addressing

1403

# 'bzr branch' performance issues.

1404

for _ in self._insert_record_stream(stream, random_id=False):

1405

pass

1406

1407

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1408

reuse_blocks=True):

1409

"""Internal core to insert a record stream into this container.

1410

1411

This helper function has a different interface than insert_record_stream

1412

to allow add_lines to be minimal, but still return the needed data.

1413

1414

:param stream: A stream of records to insert.

1415

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1416

raise ExistingContent, rather than committing the new text.

1417

:param reuse_blocks: If the source is streaming from

1418

groupcompress-blocks, just insert the blocks as-is, rather than

1419

expanding the texts and inserting again.

1420

:return: An iterator over the sha1 of the inserted records.

1421

:seealso insert_record_stream:

1422

:seealso add_lines:

1423

"""

1424

adapters = {}

1425

def get_adapter(adapter_key):

1426

try:

1427

return adapters[adapter_key]

1428

except KeyError:

1429

adapter_factory = adapter_registry.get(adapter_key)

1430

adapter = adapter_factory(self)

1431

adapters[adapter_key] = adapter

1432

return adapter

1433

# This will go up to fulltexts for gc to gc fetching, which isn't

1434

# ideal.

1435

self._compressor = GroupCompressor()

1436

self._unadded_refs = {}

1437

keys_to_add = []

1438

def flush():

1439

bytes = self._compressor.flush().to_bytes()

1440

index, start, length = self._access.add_raw_records(

1441

[(None, len(bytes))], bytes)[0]

1442

nodes = []

1443

for key, reads, refs in keys_to_add:

1444

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1445

self._index.add_records(nodes, random_id=random_id)

1446

self._unadded_refs = {}

1447

del keys_to_add[:]

1448

self._compressor = GroupCompressor()

1449

1450

last_prefix = None

1451

max_fulltext_len = 0

1452

max_fulltext_prefix = None

1453

insert_manager = None

1454

block_start = None

1455

block_length = None

1456

# XXX: TODO: remove this, it is just for safety checking for now

1457

inserted_keys = set()

1458

for record in stream:

1459

# Raise an error when a record is missing.

1460

if record.storage_kind == 'absent':

1461

raise errors.RevisionNotPresent(record.key, self)

1462

if random_id:

1463

if record.key in inserted_keys:

1464

trace.note('Insert claimed random_id=True,'

1465

' but then inserted %r two times', record.key)

1466

continue

1467

inserted_keys.add(record.key)

1468

if reuse_blocks:

1469

# If the reuse_blocks flag is set, check to see if we can just

1470

# copy a groupcompress block as-is.

1471

if record.storage_kind == 'groupcompress-block':

1472

# Insert the raw block into the target repo

1473

insert_manager = record._manager

1474

insert_manager._check_rebuild_block()

1475

bytes = record._manager._block.to_bytes()

1476

_, start, length = self._access.add_raw_records(

1477

[(None, len(bytes))], bytes)[0]

1478

del bytes

1479

block_start = start

1480

block_length = length

1481

if record.storage_kind in ('groupcompress-block',

1482

'groupcompress-block-ref'):

1483

if insert_manager is None:

1484

raise AssertionError('No insert_manager set')

1485

value = "%d %d %d %d" % (block_start, block_length,

1486

record._start, record._end)

1487

nodes = [(record.key, value, (record.parents,))]

1488

# TODO: Consider buffering up many nodes to be added, not

1489

# sure how much overhead this has, but we're seeing

1490

# ~23s / 120s in add_records calls

1491

self._index.add_records(nodes, random_id=random_id)

1492

continue

1493

try:

1494

bytes = record.get_bytes_as('fulltext')

1495

except errors.UnavailableRepresentation:

1496

adapter_key = record.storage_kind, 'fulltext'

1497

adapter = get_adapter(adapter_key)

1498

bytes = adapter.get_bytes(record)

1499

if len(record.key) > 1:

1500

prefix = record.key[0]

1501

soft = (prefix == last_prefix)

1502

else:

1503

prefix = None

1504

soft = False

1505

if max_fulltext_len < len(bytes):

1506

max_fulltext_len = len(bytes)

1507

max_fulltext_prefix = prefix

1508

(found_sha1, start_point, end_point,

1509

type) = self._compressor.compress(record.key,

1510

bytes, record.sha1, soft=soft,

1511

nostore_sha=nostore_sha)

1512

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1513

# Check if we want to continue to include that text

1514

if (prefix == max_fulltext_prefix

1515

and end_point < 2 * max_fulltext_len):

1516

# As long as we are on the same file_id, we will fill at least

1517

# 2 * max_fulltext_len

1518

start_new_block = False

1519

elif end_point > 4*1024*1024:

1520

start_new_block = True

1521

elif (prefix is not None and prefix != last_prefix

1522

and end_point > 2*1024*1024):

1523

start_new_block = True

1524

else:

1525

start_new_block = False

1526

last_prefix = prefix

1527

if start_new_block:

1528

self._compressor.pop_last()

1529

flush()

1530

max_fulltext_len = len(bytes)

1531

(found_sha1, start_point, end_point,

1532

type) = self._compressor.compress(record.key, bytes,

1533

record.sha1)

1534

if record.key[-1] is None:

1535

key = record.key[:-1] + ('sha1:' + found_sha1,)

1536

else:

1537

key = record.key

1538

self._unadded_refs[key] = record.parents

1539

yield found_sha1

1540

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1541

(record.parents,)))

1542

if len(keys_to_add):

1543

flush()

1544

self._compressor = None

1545

1546

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1547

"""Iterate over the lines in the versioned files from keys.

1548

1549

This may return lines from other keys. Each item the returned

1550

iterator yields is a tuple of a line and a text version that that line

1551

is present in (not introduced in).

1552

1553

Ordering of results is in whatever order is most suitable for the

1554

underlying storage format.

1555

1556

If a progress bar is supplied, it may be used to indicate progress.

1557

The caller is responsible for cleaning up progress bars (because this

1558

is an iterator).

1559

1560

NOTES:

1561

* Lines are normalised by the underlying store: they will all have \n

1562

terminators.

1563

* Lines are returned in arbitrary order.

1564

1565

:return: An iterator over (line, key).

1566

"""

1567

keys = set(keys)

1568

total = len(keys)

1569

# we don't care about inclusions, the caller cares.

1570

# but we need to setup a list of records to visit.

1571

# we need key, position, length

1572

for key_idx, record in enumerate(self.get_record_stream(keys,

1573

'unordered', True)):

1574

# XXX: todo - optimise to use less than full texts.

1575

key = record.key

1576

if pb is not None:

1577

pb.update('Walking content', key_idx, total)

1578

if record.storage_kind == 'absent':

1579

raise errors.RevisionNotPresent(key, self)

1580

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1581

for line in lines:

1582

yield line, key

1583

if pb is not None:

1584

pb.update('Walking content', total, total)

1585

1586

def keys(self):

1587

"""See VersionedFiles.keys."""

1588

if 'evil' in debug.debug_flags:

1589

trace.mutter_callsite(2, "keys scales with size of history")

1590

sources = [self._index] + self._fallback_vfs

1591

result = set()

1592

for source in sources:

1593

result.update(source.keys())

1594

return result

1595

1596

1597

class _GCGraphIndex(object):

1598

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1599

1600

def __init__(self, graph_index, is_locked, parents=True,

1601

add_callback=None, track_external_parent_refs=False,

1602

inconsistency_fatal=True):

1603

"""Construct a _GCGraphIndex on a graph_index.

1604

1605

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1606

:param is_locked: A callback, returns True if the index is locked and

1607

thus usable.

1608

:param parents: If True, record knits parents, if not do not record

1609

parents.

1610

:param add_callback: If not None, allow additions to the index and call

1611

this callback with a list of added GraphIndex nodes:

1612

[(node, value, node_refs), ...]

1613

:param track_external_parent_refs: As keys are added, keep track of the

1614

keys they reference, so that we can query get_missing_parents(),

1615

etc.

1616

:param inconsistency_fatal: When asked to add records that are already

1617

present, and the details are inconsistent with the existing

1618

record, raise an exception instead of warning (and skipping the

1619

record).

1620

"""

1621

self._add_callback = add_callback

1622

self._graph_index = graph_index

1623

self._parents = parents

1624

self.has_graph = parents

1625

self._is_locked = is_locked

1626

self._inconsistency_fatal = inconsistency_fatal

1627

if track_external_parent_refs:

1628

self._key_dependencies = knit._KeyRefs()

1629

else:

1630

self._key_dependencies = None

1631

1632

def add_records(self, records, random_id=False):

1633

"""Add multiple records to the index.

1634

1635

This function does not insert data into the Immutable GraphIndex

1636

backing the KnitGraphIndex, instead it prepares data for insertion by

1637

the caller and checks that it is safe to insert then calls

1638

self._add_callback with the prepared GraphIndex nodes.

1639

1640

:param records: a list of tuples:

1641

(key, options, access_memo, parents).

1642

:param random_id: If True the ids being added were randomly generated

1643

and no check for existence will be performed.

1644

"""

1645

if not self._add_callback:

1646

raise errors.ReadOnlyError(self)

1647

# we hope there are no repositories with inconsistent parentage

1648

# anymore.

1649

1650

changed = False

1651

keys = {}

1652

for (key, value, refs) in records:

1653

if not self._parents:

1654

if refs:

1655

for ref in refs:

1656

if ref:

1657

raise errors.KnitCorrupt(self,

1658

"attempt to add node with parents "

1659

"in parentless index.")

1660

refs = ()

1661

changed = True

1662

keys[key] = (value, refs)

1663

# check for dups

1664

if not random_id:

1665

present_nodes = self._get_entries(keys)

1666

for (index, key, value, node_refs) in present_nodes:

1667

if node_refs != keys[key][1]:

1668

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1669

if self._inconsistency_fatal:

1670

raise errors.KnitCorrupt(self, "inconsistent details"

1671

" in add_records: %s" %

1672

details)

1673

else:

1674

trace.warning("inconsistent details in skipped"

1675

" record: %s", details)

1676

del keys[key]

1677

changed = True

1678

if changed:

1679

result = []

1680

if self._parents:

1681

for key, (value, node_refs) in keys.iteritems():

1682

result.append((key, value, node_refs))

1683

else:

1684

for key, (value, node_refs) in keys.iteritems():

1685

result.append((key, value))

1686

records = result

1687

key_dependencies = self._key_dependencies

1688

if key_dependencies is not None and self._parents:

1689

for key, value, refs in records:

1690

parents = refs[0]

1691

key_dependencies.add_references(key, parents)

1692

self._add_callback(records)

1693

1694

def _check_read(self):

1695

"""Raise an exception if reads are not permitted."""

1696

if not self._is_locked():

1697

raise errors.ObjectNotLocked(self)

1698

1699

def _check_write_ok(self):

1700

"""Raise an exception if writes are not permitted."""

1701

if not self._is_locked():

1702

raise errors.ObjectNotLocked(self)

1703

1704

def _get_entries(self, keys, check_present=False):

1705

"""Get the entries for keys.

1706

1707

Note: Callers are responsible for checking that the index is locked

1708

before calling this method.

1709

1710

:param keys: An iterable of index key tuples.

1711

"""

1712

keys = set(keys)

1713

found_keys = set()

1714

if self._parents:

1715

for node in self._graph_index.iter_entries(keys):

1716

yield node

1717

found_keys.add(node[1])

1718

else:

1719

# adapt parentless index to the rest of the code.

1720

for node in self._graph_index.iter_entries(keys):

1721

yield node[0], node[1], node[2], ()

1722

found_keys.add(node[1])

1723

if check_present:

1724

missing_keys = keys.difference(found_keys)

1725

if missing_keys:

1726

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1727

1728

def get_parent_map(self, keys):

1729

"""Get a map of the parents of keys.

1730

1731

:param keys: The keys to look up parents for.

1732

:return: A mapping from keys to parents. Absent keys are absent from

1733

the mapping.

1734

"""

1735

self._check_read()

1736

nodes = self._get_entries(keys)

1737

result = {}

1738

if self._parents:

1739

for node in nodes:

1740

result[node[1]] = node[3][0]

1741

else:

1742

for node in nodes:

1743

result[node[1]] = None

1744

return result

1745

1746

def get_missing_parents(self):

1747

"""Return the keys of missing parents."""

1748

# Copied from _KnitGraphIndex.get_missing_parents

1749

# We may have false positives, so filter those out.

1750

self._key_dependencies.add_keys(

1751

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1752

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1753

1754

def get_build_details(self, keys):

1755

"""Get the various build details for keys.

1756

1757

Ghosts are omitted from the result.

1758

1759

:param keys: An iterable of keys.

1760

:return: A dict of key:

1761

(index_memo, compression_parent, parents, record_details).

1762

index_memo

1763

opaque structure to pass to read_records to extract the raw

1764

data

1765

compression_parent

1766

Content that this record is built upon, may be None

1767

parents

1768

Logical parents of this node

1769

record_details

1770

extra information about the content which needs to be passed to

1771

Factory.parse_record

1772

"""

1773

self._check_read()

1774

result = {}

1775

entries = self._get_entries(keys)

1776

for entry in entries:

1777

key = entry[1]

1778

if not self._parents:

1779

parents = None

1780

else:

1781

parents = entry[3][0]

1782

method = 'group'

1783

result[key] = (self._node_to_position(entry),

1784

None, parents, (method, None))

1785

return result

1786

1787

def keys(self):

1788

"""Get all the keys in the collection.

1789

1790

The keys are not ordered.

1791

"""

1792

self._check_read()

1793

return [node[1] for node in self._graph_index.iter_all_entries()]

1794

1795

def _node_to_position(self, node):

1796

"""Convert an index value to position details."""

1797

bits = node[2].split(' ')

1798

# It would be nice not to read the entire gzip.

1799

start = int(bits[0])

1800

stop = int(bits[1])

1801

basis_end = int(bits[2])

1802

delta_end = int(bits[3])

1803

return node[0], start, stop, basis_end, delta_end

1804

1805

def scan_unvalidated_index(self, graph_index):

1806

"""Inform this _GCGraphIndex that there is an unvalidated index.

1807

1808

This allows this _GCGraphIndex to keep track of any missing

1809

compression parents we may want to have filled in to make those

1810

indices valid.

1811

1812

:param graph_index: A GraphIndex

1813

"""

1814

if self._key_dependencies is not None:

1815

# Add parent refs from graph_index (and discard parent refs that

1816

# the graph_index has).

1817

add_refs = self._key_dependencies.add_references

1818

for node in graph_index.iter_all_entries():

1819

add_refs(node[1], node[3][0])

1820

1821

1822

1823

from bzrlib._groupcompress_py import (

1824

apply_delta,

1825

apply_delta_to_source,

1826

encode_base128_int,

1827

decode_base128_int,

1828

decode_copy_instruction,

1829

LinesDeltaIndex,

1830

)

1831

try:

1832

from bzrlib._groupcompress_pyx import (

1833

apply_delta,

1834

apply_delta_to_source,

1835

DeltaIndex,

1836

encode_base128_int,

1837

decode_base128_int,

1838

)

1839

GroupCompressor = PyrexGroupCompressor

1840

except ImportError:

1841

GroupCompressor = PythonGroupCompressor

1842

Older »