~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Robert Collins
Date: 2009-07-07 04:32:13 UTC
mto: This revision was merged to the branch mainline in revision 4524.
Revision ID: robertc@robertcollins.net-20090707043213-4hjjhgr40iq7gk2d

More informative assertions in xml serialisation.

files added:
bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/btree_index.py

bzrlib/bundle/serializer/v4.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/index.py

bzrlib/inventory_delta.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/version_info_formats/format_custom.py

bzrlib/views.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-format4.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/directory-fingerprints.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/indices.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revision-properties.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.ru.txt

doc/index.txt

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/time_graph.py

tools/win32/build_release.py

tools/win32/run_script.py

files removed:
bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/tests/test_bencode.py

bzrlib/xml6.py

doc/README.1st

doc/developers/performance-contributing.txt

doc/developers/scratch.txt

files renamed:
bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/help_topics.py => bzrlib/help_topics/__init__.py

doc/configuration.txt => bzrlib/help_topics/en/configuration.txt

bzrlib/plugins/launchpad/lp_indirect.py => bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_lp_indirect.py => bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

bzrlib/tests/HttpServer.py => bzrlib/tests/http_server.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/transport/ftp.py => bzrlib/transport/ftp/__init__.py

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

bzrlib/xml5.py => bzrlib/xml8.py

doc/developers/HACKING => doc/en/developer-guide/HACKING.txt

doc/centralized_workflow.txt => doc/en/tutorials/centralized_workflow.txt

doc/tutorial.txt => doc/en/tutorials/tutorial.txt

doc/bug_trackers.txt => doc/en/user-guide/bug_trackers.txt

doc/http_smart_server.txt => doc/en/user-guide/http_smart_server.txt

doc/index.txt => doc/en/user-guide/index.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/server.txt => doc/en/user-guide/server.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/shared_repository_layouts.txt => doc/en/user-guide/shared_repository_layouts.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

doc/version_info.txt => doc/en/user-guide/version_info.txt

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

bzr.ico

bzrlib/__init__.py

bzrlib/_knit_load_data_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib/newinventory.py

contrib/pwclient.full

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers/api-versioning.txt

doc/developers/bundles.txt

doc/developers/container-format.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/merge-scaling.txt

doc/developers/performance-roadmap.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

profile_imports.py

setup.py

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.graph import Graph

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

except KeyError:

per_prefix_map[prefix] = [item]

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

def __init__(self):

100

# map by key? or just order in file?

101

self._compressor_name = None

102

self._z_content = None

103

self._z_content_decompressor = None

104

self._z_content_length = None

105

self._content_length = None

106

self._content = None

107

self._content_chunks = None

108

109

def __len__(self):

110

# This is the maximum number of bytes this object will reference if

111

# everything is decompressed. However, if we decompress less than

112

# everything... (this would cause some problems for LRUSizeCache)

113

return self._content_length + self._z_content_length

114

115

def _ensure_content(self, num_bytes=None):

116

"""Make sure that content has been expanded enough.

117

118

:param num_bytes: Ensure that we have extracted at least num_bytes of

119

content. If None, consume everything

120

"""

121

# TODO: If we re-use the same content block at different times during

122

# get_record_stream(), it is possible that the first pass will

123

# get inserted, triggering an extract/_ensure_content() which

124

# will get rid of _z_content. And then the next use of the block

125

# will try to access _z_content (to send it over the wire), and

126

# fail because it is already extracted. Consider never releasing

127

# _z_content because of this.

128

if num_bytes is None:

129

num_bytes = self._content_length

130

elif (self._content_length is not None

131

and num_bytes > self._content_length):

132

raise AssertionError(

133

'requested num_bytes (%d) > content length (%d)'

134

% (num_bytes, self._content_length))

135

# Expand the content if required

136

if self._content is None:

137

if self._content_chunks is not None:

138

self._content = ''.join(self._content_chunks)

139

self._content_chunks = None

140

if self._content is None:

141

if self._z_content is None:

142

raise AssertionError('No content to decompress')

143

if self._z_content == '':

144

self._content = ''

145

elif self._compressor_name == 'lzma':

146

# We don't do partial lzma decomp yet

147

self._content = pylzma.decompress(self._z_content)

148

elif self._compressor_name == 'zlib':

149

# Start a zlib decompressor

150

if num_bytes is None:

151

self._content = zlib.decompress(self._z_content)

152

else:

153

self._z_content_decompressor = zlib.decompressobj()

154

# Seed the decompressor with the uncompressed bytes, so

155

# that the rest of the code is simplified

156

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

else:

159

raise AssertionError('Unknown compressor: %r'

160

% self._compressor_name)

161

# Any bytes remaining to be decompressed will be in the decompressors

162

# 'unconsumed_tail'

163

164

# Do we have enough bytes already?

165

if num_bytes is not None and len(self._content) >= num_bytes:

166

return

167

if num_bytes is None and self._z_content_decompressor is None:

168

# We must have already decompressed everything

169

return

170

# If we got this far, and don't have a decompressor, something is wrong

171

if self._z_content_decompressor is None:

172

raise AssertionError(

173

'No decompressor to decompress %d bytes' % num_bytes)

174

remaining_decomp = self._z_content_decompressor.unconsumed_tail

175

if num_bytes is None:

176

if remaining_decomp:

177

# We don't know how much is left, but we'll decompress it all

178

self._content += self._z_content_decompressor.decompress(

179

remaining_decomp)

180

# Note: There's what I consider a bug in zlib.decompressobj

181

# If you pass back in the entire unconsumed_tail, only

182

# this time you don't pass a max-size, it doesn't

183

# change the unconsumed_tail back to None/''.

184

# However, we know we are done with the whole stream

185

self._z_content_decompressor = None

186

# XXX: Why is this the only place in this routine we set this?

187

self._content_length = len(self._content)

188

else:

189

if not remaining_decomp:

190

raise AssertionError('Nothing left to decompress')

191

needed_bytes = num_bytes - len(self._content)

192

# We always set max_size to 32kB over the minimum needed, so that

193

# zlib will give us as much as we really want.

194

# TODO: If this isn't good enough, we could make a loop here,

195

# that keeps expanding the request until we get enough

196

self._content += self._z_content_decompressor.decompress(

197

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

198

if len(self._content) < num_bytes:

199

raise AssertionError('%d bytes wanted, only %d available'

200

% (num_bytes, len(self._content)))

201

if not self._z_content_decompressor.unconsumed_tail:

202

# The stream is finished

203

self._z_content_decompressor = None

204

205

def _parse_bytes(self, bytes, pos):

206

"""Read the various lengths from the header.

207

208

This also populates the various 'compressed' buffers.

209

210

:return: The position in bytes just after the last newline

211

"""

212

# At present, we have 2 integers for the compressed and uncompressed

213

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

214

# checking too far, cap the search to 14 bytes.

215

pos2 = bytes.index('\n', pos, pos + 14)

216

self._z_content_length = int(bytes[pos:pos2])

217

pos = pos2 + 1

218

pos2 = bytes.index('\n', pos, pos + 14)

219

self._content_length = int(bytes[pos:pos2])

220

pos = pos2 + 1

221

if len(bytes) != (pos + self._z_content_length):

222

# XXX: Define some GCCorrupt error ?

223

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

224

(len(bytes), pos, self._z_content_length))

225

self._z_content = bytes[pos:]

226

227

@classmethod

228

def from_bytes(cls, bytes):

229

out = cls()

230

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

231

raise ValueError('bytes did not start with any of %r'

232

% (cls.GCB_KNOWN_HEADERS,))

233

# XXX: why not testing the whole header ?

234

if bytes[4] == 'z':

235

out._compressor_name = 'zlib'

236

elif bytes[4] == 'l':

237

out._compressor_name = 'lzma'

238

else:

239

raise ValueError('unknown compressor: %r' % (bytes,))

240

out._parse_bytes(bytes, 6)

241

return out

242

243

def extract(self, key, start, end, sha1=None):

244

"""Extract the text for a specific key.

245

246

:param key: The label used for this content

247

:param sha1: TODO (should we validate only when sha1 is supplied?)

248

:return: The bytes for the content

249

"""

250

if start == end == 0:

251

return ''

252

self._ensure_content(end)

253

# The bytes are 'f' or 'd' for the type, then a variable-length

254

# base128 integer for the content size, then the actual content

255

# We know that the variable-length integer won't be longer than 5

256

# bytes (it takes 5 bytes to encode 2^32)

257

c = self._content[start]

258

if c == 'f':

259

type = 'fulltext'

260

else:

261

if c != 'd':

262

raise ValueError('Unknown content control code: %s'

263

% (c,))

264

type = 'delta'

265

content_len, len_len = decode_base128_int(

266

self._content[start + 1:start + 6])

267

content_start = start + 1 + len_len

268

if end != content_start + content_len:

269

raise ValueError('end != len according to field header'

270

' %s != %s' % (end, content_start + content_len))

271

if c == 'f':

272

bytes = self._content[content_start:end]

273

elif c == 'd':

274

bytes = apply_delta_to_source(self._content, content_start, end)

275

return bytes

276

277

def set_chunked_content(self, content_chunks, length):

278

"""Set the content of this block to the given chunks."""

279

# If we have lots of short lines, it is may be more efficient to join

280

# the content ahead of time. If the content is <10MiB, we don't really

281

# care about the extra memory consumption, so we can just pack it and

282

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

283

# mysql, which is below the noise margin

284

self._content_length = length

285

self._content_chunks = content_chunks

286

self._content = None

287

self._z_content = None

288

289

def set_content(self, content):

290

"""Set the content of this block."""

291

self._content_length = len(content)

292

self._content = content

293

self._z_content = None

294

295

def _create_z_content_using_lzma(self):

296

if self._content_chunks is not None:

297

self._content = ''.join(self._content_chunks)

298

self._content_chunks = None

299

if self._content is None:

300

raise AssertionError('Nothing to compress')

301

self._z_content = pylzma.compress(self._content)

302

self._z_content_length = len(self._z_content)

303

304

def _create_z_content_from_chunks(self):

305

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

306

compressed_chunks = map(compressor.compress, self._content_chunks)

307

compressed_chunks.append(compressor.flush())

308

self._z_content = ''.join(compressed_chunks)

309

self._z_content_length = len(self._z_content)

310

311

def _create_z_content(self):

312

if self._z_content is not None:

313

return

314

if _USE_LZMA:

315

self._create_z_content_using_lzma()

316

return

317

if self._content_chunks is not None:

318

self._create_z_content_from_chunks()

319

return

320

self._z_content = zlib.compress(self._content)

321

self._z_content_length = len(self._z_content)

322

323

def to_bytes(self):

324

"""Encode the information into a byte stream."""

325

self._create_z_content()

326

if _USE_LZMA:

327

header = self.GCB_LZ_HEADER

328

else:

329

header = self.GCB_HEADER

330

chunks = [header,

331

'%d\n%d\n' % (self._z_content_length, self._content_length),

332

self._z_content,

333

]

334

return ''.join(chunks)

335

336

def _dump(self, include_text=False):

337

"""Take this block, and spit out a human-readable structure.

338

339

:param include_text: Inserts also include text bits, chose whether you

340

want this displayed in the dump or not.

341

:return: A dump of the given block. The layout is something like:

342

[('f', length), ('d', delta_length, text_length, [delta_info])]

343

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

344

...]

345

"""

346

self._ensure_content()

347

result = []

348

pos = 0

349

while pos < self._content_length:

350

kind = self._content[pos]

351

pos += 1

352

if kind not in ('f', 'd'):

353

raise ValueError('invalid kind character: %r' % (kind,))

354

content_len, len_len = decode_base128_int(

355

self._content[pos:pos + 5])

356

pos += len_len

357

if content_len + pos > self._content_length:

358

raise ValueError('invalid content_len %d for record @ pos %d'

359

% (content_len, pos - len_len - 1))

360

if kind == 'f': # Fulltext

361

if include_text:

362

text = self._content[pos:pos+content_len]

363

result.append(('f', content_len, text))

364

else:

365

result.append(('f', content_len))

366

elif kind == 'd': # Delta

367

delta_content = self._content[pos:pos+content_len]

368

delta_info = []

369

# The first entry in a delta is the decompressed length

370

decomp_len, delta_pos = decode_base128_int(delta_content)

371

result.append(('d', content_len, decomp_len, delta_info))

372

measured_len = 0

373

while delta_pos < content_len:

374

c = ord(delta_content[delta_pos])

375

delta_pos += 1

376

if c & 0x80: # Copy

377

(offset, length,

378

delta_pos) = decode_copy_instruction(delta_content, c,

379

delta_pos)

380

if include_text:

381

text = self._content[offset:offset+length]

382

delta_info.append(('c', offset, length, text))

383

else:

384

delta_info.append(('c', offset, length))

385

measured_len += length

386

else: # Insert

387

if include_text:

388

txt = delta_content[delta_pos:delta_pos+c]

389

else:

390

txt = ''

391

delta_info.append(('i', c, txt))

392

measured_len += c

393

delta_pos += c

394

if delta_pos != content_len:

395

raise ValueError('Delta consumed a bad number of bytes:'

396

' %d != %d' % (delta_pos, content_len))

397

if measured_len != decomp_len:

398

raise ValueError('Delta claimed fulltext was %d bytes, but'

399

' extraction resulted in %d bytes'

400

% (decomp_len, measured_len))

401

pos += content_len

402

return result

403

404

405

class _LazyGroupCompressFactory(object):

406

"""Yield content from a GroupCompressBlock on demand."""

407

408

def __init__(self, key, parents, manager, start, end, first):

409

"""Create a _LazyGroupCompressFactory

410

411

:param key: The key of just this record

412

:param parents: The parents of this key (possibly None)

413

:param gc_block: A GroupCompressBlock object

414

:param start: Offset of the first byte for this record in the

415

uncompressd content

416

:param end: Offset of the byte just after the end of this record

417

(ie, bytes = content[start:end])

418

:param first: Is this the first Factory for the given block?

419

"""

420

self.key = key

421

self.parents = parents

422

self.sha1 = None

423

# Note: This attribute coupled with Manager._factories creates a

424

# reference cycle. Perhaps we would rather use a weakref(), or

425

# find an appropriate time to release the ref. After the first

426

# get_bytes_as call? After Manager.get_record_stream() returns

427

# the object?

428

self._manager = manager

429

self._bytes = None

430

self.storage_kind = 'groupcompress-block'

431

if not first:

432

self.storage_kind = 'groupcompress-block-ref'

433

self._first = first

434

self._start = start

435

self._end = end

436

437

def __repr__(self):

438

return '%s(%s, first=%s)' % (self.__class__.__name__,

439

self.key, self._first)

440

441

def get_bytes_as(self, storage_kind):

442

if storage_kind == self.storage_kind:

443

if self._first:

444

# wire bytes, something...

445

return self._manager._wire_bytes()

446

else:

447

return ''

448

if storage_kind in ('fulltext', 'chunked'):

449

if self._bytes is None:

450

# Grab and cache the raw bytes for this entry

451

# and break the ref-cycle with _manager since we don't need it

452

# anymore

453

self._manager._prepare_for_extract()

454

block = self._manager._block

455

self._bytes = block.extract(self.key, self._start, self._end)

456

# There are code paths that first extract as fulltext, and then

457

# extract as storage_kind (smart fetch). So we don't break the

458

# refcycle here, but instead in manager.get_record_stream()

459

# self._manager = None

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

def __init__(self, block):

472

self._block = block

473

# We need to preserve the ordering

474

self._factories = []

475

self._last_byte = 0

476

477

def add_factory(self, key, parents, start, end):

478

if not self._factories:

479

first = True

480

else:

481

first = False

482

# Note that this creates a reference cycle....

483

factory = _LazyGroupCompressFactory(key, parents, self,

484

start, end, first=first)

485

# max() works here, but as a function call, doing a compare seems to be

486

# significantly faster, timeit says 250ms for max() and 100ms for the

487

# comparison

488

if end > self._last_byte:

489

self._last_byte = end

490

self._factories.append(factory)

491

492

def get_record_stream(self):

493

"""Get a record for all keys added so far."""

494

for factory in self._factories:

495

yield factory

496

# Break the ref-cycle

497

factory._bytes = None

498

factory._manager = None

499

# TODO: Consider setting self._factories = None after the above loop,

500

# as it will break the reference cycle

501

502

def _trim_block(self, last_byte):

503

"""Create a new GroupCompressBlock, with just some of the content."""

504

# None of the factories need to be adjusted, because the content is

505

# located in an identical place. Just that some of the unreferenced

506

# trailing bytes are stripped

507

trace.mutter('stripping trailing bytes from groupcompress block'

508

' %d => %d', self._block._content_length, last_byte)

509

new_block = GroupCompressBlock()

510

self._block._ensure_content(last_byte)

511

new_block.set_content(self._block._content[:last_byte])

512

self._block = new_block

513

514

def _rebuild_block(self):

515

"""Create a new GroupCompressBlock with only the referenced texts."""

516

compressor = GroupCompressor()

517

tstart = time.time()

518

old_length = self._block._content_length

519

end_point = 0

520

for factory in self._factories:

521

bytes = factory.get_bytes_as('fulltext')

522

(found_sha1, start_point, end_point,

523

type) = compressor.compress(factory.key, bytes, factory.sha1)

524

# Now update this factory with the new offsets, etc

525

factory.sha1 = found_sha1

526

factory._start = start_point

527

factory._end = end_point

528

self._last_byte = end_point

529

new_block = compressor.flush()

530

# TODO: Should we check that new_block really *is* smaller than the old

531

# block? It seems hard to come up with a method that it would

532

# expand, since we do full compression again. Perhaps based on a

533

# request that ends up poorly ordered?

534

delta = time.time() - tstart

535

self._block = new_block

536

trace.mutter('creating new compressed block on-the-fly in %.3fs'

537

' %d bytes => %d bytes', delta, old_length,

538

self._block._content_length)

539

540

def _prepare_for_extract(self):

541

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

542

# We expect that if one child is going to fulltext, all will be. This

543

# helps prevent all of them from extracting a small amount at a time.

544

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

545

# time (self._block._content) is a little expensive.

546

self._block._ensure_content(self._last_byte)

547

548

def _check_rebuild_block(self):

549

"""Check to see if our block should be repacked."""

550

total_bytes_used = 0

551

last_byte_used = 0

552

for factory in self._factories:

553

total_bytes_used += factory._end - factory._start

554

last_byte_used = max(last_byte_used, factory._end)

555

# If we are using most of the bytes from the block, we have nothing

556

# else to check (currently more that 1/2)

557

if total_bytes_used * 2 >= self._block._content_length:

558

return

559

# Can we just strip off the trailing bytes? If we are going to be

560

# transmitting more than 50% of the front of the content, go ahead

561

if total_bytes_used * 2 > last_byte_used:

562

self._trim_block(last_byte_used)

563

return

564

565

# We are using a small amount of the data, and it isn't just packed

566

# nicely at the front, so rebuild the content.

567

# Note: This would be *nicer* as a strip-data-from-group, rather than

568

# building it up again from scratch

569

# It might be reasonable to consider the fulltext sizes for

570

# different bits when deciding this, too. As you may have a small

571

# fulltext, and a trivial delta, and you are just trading around

572

# for another fulltext. If we do a simple 'prune' you may end up

573

# expanding many deltas into fulltexts, as well.

574

# If we build a cheap enough 'strip', then we could try a strip,

575

# if that expands the content, we then rebuild.

576

self._rebuild_block()

577

578

def _wire_bytes(self):

579

"""Return a byte stream suitable for transmitting over the wire."""

580

self._check_rebuild_block()

581

# The outer block starts with:

582

# 'groupcompress-block\n'

583

# <length of compressed key info>\n

584

# <length of uncompressed info>\n

585

# <length of gc block>\n

586

# <header bytes>

587

# <gc-block>

588

lines = ['groupcompress-block\n']

589

# The minimal info we need is the key, the start offset, and the

590

# parents. The length and type are encoded in the record itself.

591

# However, passing in the other bits makes it easier. The list of

592

# keys, and the start offset, the length

593

# 1 line key

594

# 1 line with parents, '' for ()

595

# 1 line for start offset

596

# 1 line for end byte

597

header_lines = []

598

for factory in self._factories:

599

key_bytes = '\x00'.join(factory.key)

600

parents = factory.parents

601

if parents is None:

602

parent_bytes = 'None:'

603

else:

604

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

605

record_header = '%s\n%s\n%d\n%d\n' % (

606

key_bytes, parent_bytes, factory._start, factory._end)

607

header_lines.append(record_header)

608

# TODO: Can we break the refcycle at this point and set

609

# factory._manager = None?

610

header_bytes = ''.join(header_lines)

611

del header_lines

612

header_bytes_len = len(header_bytes)

613

z_header_bytes = zlib.compress(header_bytes)

614

del header_bytes

615

z_header_bytes_len = len(z_header_bytes)

616

block_bytes = self._block.to_bytes()

617

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

618

len(block_bytes)))

619

lines.append(z_header_bytes)

620

lines.append(block_bytes)

621

del z_header_bytes, block_bytes

622

return ''.join(lines)

623

624

@classmethod

625

def from_bytes(cls, bytes):

626

# TODO: This does extra string copying, probably better to do it a

627

# different way

628

(storage_kind, z_header_len, header_len,

629

block_len, rest) = bytes.split('\n', 4)

630

del bytes

631

if storage_kind != 'groupcompress-block':

632

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

633

z_header_len = int(z_header_len)

634

if len(rest) < z_header_len:

635

raise ValueError('Compressed header len shorter than all bytes')

636

z_header = rest[:z_header_len]

637

header_len = int(header_len)

638

header = zlib.decompress(z_header)

639

if len(header) != header_len:

640

raise ValueError('invalid length for decompressed bytes')

641

del z_header

642

block_len = int(block_len)

643

if len(rest) != z_header_len + block_len:

644

raise ValueError('Invalid length for block')

645

block_bytes = rest[z_header_len:]

646

del rest

647

# So now we have a valid GCB, we just need to parse the factories that

648

# were sent to us

649

header_lines = header.split('\n')

650

del header

651

last = header_lines.pop()

652

if last != '':

653

raise ValueError('header lines did not end with a trailing'

654

' newline')

655

if len(header_lines) % 4 != 0:

656

raise ValueError('The header was not an even multiple of 4 lines')

657

block = GroupCompressBlock.from_bytes(block_bytes)

658

del block_bytes

659

result = cls(block)

660

for start in xrange(0, len(header_lines), 4):

661

# intern()?

662

key = tuple(header_lines[start].split('\x00'))

663

parents_line = header_lines[start+1]

664

if parents_line == 'None:':

665

parents = None

666

else:

667

parents = tuple([tuple(segment.split('\x00'))

668

for segment in parents_line.split('\t')

669

if segment])

670

start_offset = int(header_lines[start+2])

671

end_offset = int(header_lines[start+3])

672

result.add_factory(key, parents, start_offset, end_offset)

673

return result

674

675

676

def network_block_to_records(storage_kind, bytes, line_end):

677

if storage_kind != 'groupcompress-block':

678

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

679

manager = _LazyGroupContentManager.from_bytes(bytes)

680

return manager.get_record_stream()

681

682

683

class _CommonGroupCompressor(object):

684

685

def __init__(self):

686

"""Create a GroupCompressor."""

687

self.chunks = []

688

self._last = None

689

self.endpoint = 0

690

self.input_bytes = 0

691

self.labels_deltas = {}

692

self._delta_index = None # Set by the children

693

self._block = GroupCompressBlock()

694

695

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

696

"""Compress lines with label key.

697

698

:param key: A key tuple. It is stored in the output

699

for identification of the text during decompression. If the last

700

element is 'None' it is replaced with the sha1 of the text -

701

e.g. sha1:xxxxxxx.

702

:param bytes: The bytes to be compressed

703

:param expected_sha: If non-None, the sha the lines are believed to

704

have. During compression the sha is calculated; a mismatch will

705

cause an error.

706

:param nostore_sha: If the computed sha1 sum matches, we will raise

707

ExistingContent rather than adding the text.

708

:param soft: Do a 'soft' compression. This means that we require larger

709

ranges to match to be considered for a copy command.

710

711

:return: The sha1 of lines, the start and end offsets in the delta, and

712

the type ('fulltext' or 'delta').

713

714

:seealso VersionedFiles.add_lines:

715

"""

716

if not bytes: # empty, like a dir entry, etc

717

if nostore_sha == _null_sha1:

718

raise errors.ExistingContent()

719

return _null_sha1, 0, 0, 'fulltext'

720

# we assume someone knew what they were doing when they passed it in

721

if expected_sha is not None:

722

sha1 = expected_sha

723

else:

724

sha1 = osutils.sha_string(bytes)

725

if nostore_sha is not None:

726

if sha1 == nostore_sha:

727

raise errors.ExistingContent()

728

if key[-1] is None:

729

key = key[:-1] + ('sha1:' + sha1,)

730

731

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

732

return sha1, start, end, type

733

734

def _compress(self, key, bytes, max_delta_size, soft=False):

735

"""Compress lines with label key.

736

737

:param key: A key tuple. It is stored in the output for identification

738

of the text during decompression.

739

740

:param bytes: The bytes to be compressed

741

742

:param max_delta_size: The size above which we issue a fulltext instead

743

of a delta.

744

745

:param soft: Do a 'soft' compression. This means that we require larger

746

ranges to match to be considered for a copy command.

747

748

:return: The sha1 of lines, the start and end offsets in the delta, and

749

the type ('fulltext' or 'delta').

750

"""

751

raise NotImplementedError(self._compress)

752

753

def extract(self, key):

754

"""Extract a key previously added to the compressor.

755

756

:param key: The key to extract.

757

:return: An iterable over bytes and the sha1.

758

"""

759

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

760

delta_chunks = self.chunks[start_chunk:end_chunk]

761

stored_bytes = ''.join(delta_chunks)

762

if stored_bytes[0] == 'f':

763

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

764

data_len = fulltext_len + 1 + offset

765

if data_len != len(stored_bytes):

766

raise ValueError('Index claimed fulltext len, but stored bytes'

767

' claim %s != %s'

768

% (len(stored_bytes), data_len))

769

bytes = stored_bytes[offset + 1:]

770

else:

771

# XXX: This is inefficient at best

772

source = ''.join(self.chunks[:start_chunk])

773

if stored_bytes[0] != 'd':

774

raise ValueError('Unknown content kind, bytes claim %s'

775

% (stored_bytes[0],))

776

delta_len, offset = decode_base128_int(stored_bytes[1:10])

777

data_len = delta_len + 1 + offset

778

if data_len != len(stored_bytes):

779

raise ValueError('Index claimed delta len, but stored bytes'

780

' claim %s != %s'

781

% (len(stored_bytes), data_len))

782

bytes = apply_delta(source, stored_bytes[offset + 1:])

783

bytes_sha1 = osutils.sha_string(bytes)

784

return bytes, bytes_sha1

785

786

def flush(self):

787

"""Finish this group, creating a formatted stream.

788

789

After calling this, the compressor should no longer be used

790

"""

791

# TODO: this causes us to 'bloat' to 2x the size of content in the

792

# group. This has an impact for 'commit' of large objects.

793

# One possibility is to use self._content_chunks, and be lazy and

794

# only fill out self._content as a full string when we actually

795

# need it. That would at least drop the peak memory consumption

796

# for 'commit' down to ~1x the size of the largest file, at a

797

# cost of increased complexity within this code. 2x is still <<

798

# 3x the size of the largest file, so we are doing ok.

799

self._block.set_chunked_content(self.chunks, self.endpoint)

800

self.chunks = None

801

self._delta_index = None

802

return self._block

803

804

def pop_last(self):

805

"""Call this if you want to 'revoke' the last compression.

806

807

After this, the data structures will be rolled back, but you cannot do

808

more compression.

809

"""

810

self._delta_index = None

811

del self.chunks[self._last[0]:]

812

self.endpoint = self._last[1]

813

self._last = None

814

815

def ratio(self):

816

"""Return the overall compression ratio."""

817

return float(self.input_bytes) / float(self.endpoint)

818

819

820

class PythonGroupCompressor(_CommonGroupCompressor):

821

822

def __init__(self):

823

"""Create a GroupCompressor.

824

825

Used only if the pyrex version is not available.

826

"""

827

super(PythonGroupCompressor, self).__init__()

828

self._delta_index = LinesDeltaIndex([])

829

# The actual content is managed by LinesDeltaIndex

830

self.chunks = self._delta_index.lines

831

832

def _compress(self, key, bytes, max_delta_size, soft=False):

833

"""see _CommonGroupCompressor._compress"""

834

input_len = len(bytes)

835

new_lines = osutils.split_lines(bytes)

836

out_lines, index_lines = self._delta_index.make_delta(

837

new_lines, bytes_length=input_len, soft=soft)

838

delta_length = sum(map(len, out_lines))

839

if delta_length > max_delta_size:

840

# The delta is longer than the fulltext, insert a fulltext

841

type = 'fulltext'

842

out_lines = ['f', encode_base128_int(input_len)]

843

out_lines.extend(new_lines)

844

index_lines = [False, False]

845

index_lines.extend([True] * len(new_lines))

846

else:

847

# this is a worthy delta, output it

848

type = 'delta'

849

out_lines[0] = 'd'

850

# Update the delta_length to include those two encoded integers

851

out_lines[1] = encode_base128_int(delta_length)

852

# Before insertion

853

start = self.endpoint

854

chunk_start = len(self.chunks)

855

self._last = (chunk_start, self.endpoint)

856

self._delta_index.extend_lines(out_lines, index_lines)

857

self.endpoint = self._delta_index.endpoint

858

self.input_bytes += input_len

859

chunk_end = len(self.chunks)

860

self.labels_deltas[key] = (start, chunk_start,

861

self.endpoint, chunk_end)

862

return start, self.endpoint, type

863

864

865

class PyrexGroupCompressor(_CommonGroupCompressor):

866

"""Produce a serialised group of compressed texts.

867

868

It contains code very similar to SequenceMatcher because of having a similar

869

task. However some key differences apply:

870

- there is no junk, we want a minimal edit not a human readable diff.

871

- we don't filter very common lines (because we don't know where a good

872

range will start, and after the first text we want to be emitting minmal

873

edits only.

874

- we chain the left side, not the right side

875

- we incrementally update the adjacency matrix as new lines are provided.

876

- we look for matches in all of the left side, so the routine which does

877

the analagous task of find_longest_match does not need to filter on the

878

left side.

879

"""

880

881

def __init__(self):

882

super(PyrexGroupCompressor, self).__init__()

883

self._delta_index = DeltaIndex()

884

885

def _compress(self, key, bytes, max_delta_size, soft=False):

886

"""see _CommonGroupCompressor._compress"""

887

input_len = len(bytes)

888

# By having action/label/sha1/len, we can parse the group if the index

889

# was ever destroyed, we have the key in 'label', we know the final

890

# bytes are valid from sha1, and we know where to find the end of this

891

# record because of 'len'. (the delta record itself will store the

892

# total length for the expanded record)

893

# 'len: %d\n' costs approximately 1% increase in total data

894

# Having the labels at all costs us 9-10% increase, 38% increase for

895

# inventory pages, and 5.8% increase for text pages

896

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

897

if self._delta_index._source_offset != self.endpoint:

898

raise AssertionError('_source_offset != endpoint'

899

' somehow the DeltaIndex got out of sync with'

900

' the output lines')

901

delta = self._delta_index.make_delta(bytes, max_delta_size)

902

if (delta is None):

903

type = 'fulltext'

904

enc_length = encode_base128_int(len(bytes))

905

len_mini_header = 1 + len(enc_length)

906

self._delta_index.add_source(bytes, len_mini_header)

907

new_chunks = ['f', enc_length, bytes]

908

else:

909

type = 'delta'

910

enc_length = encode_base128_int(len(delta))

911

len_mini_header = 1 + len(enc_length)

912

new_chunks = ['d', enc_length, delta]

913

self._delta_index.add_delta_source(delta, len_mini_header)

914

# Before insertion

915

start = self.endpoint

916

chunk_start = len(self.chunks)

917

# Now output these bytes

918

self._output_chunks(new_chunks)

919

self.input_bytes += input_len

920

chunk_end = len(self.chunks)

921

self.labels_deltas[key] = (start, chunk_start,

922

self.endpoint, chunk_end)

923

if not self._delta_index._source_offset == self.endpoint:

924

raise AssertionError('the delta index is out of sync'

925

'with the output lines %s != %s'

926

% (self._delta_index._source_offset, self.endpoint))

927

return start, self.endpoint, type

928

929

def _output_chunks(self, new_chunks):

930

"""Output some chunks.

931

932

:param new_chunks: The chunks to output.

933

"""

934

self._last = (len(self.chunks), self.endpoint)

935

endpoint = self.endpoint

936

self.chunks.extend(new_chunks)

937

endpoint += sum(map(len, new_chunks))

938

self.endpoint = endpoint

939

940

941

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

942

"""Create a factory for creating a pack based groupcompress.

943

944

This is only functional enough to run interface tests, it doesn't try to

945

provide a full pack environment.

946

947

:param graph: Store a graph.

948

:param delta: Delta compress contents.

949

:param keylength: How long should keys be.

950

"""

951

def factory(transport):

952

parents = graph

953

ref_length = 0

954

if graph:

955

ref_length = 1

956

graph_index = BTreeBuilder(reference_lists=ref_length,

957

key_elements=keylength)

958

stream = transport.open_write_stream('newpack')

959

writer = pack.ContainerWriter(stream.write)

960

writer.begin()

961

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

962

add_callback=graph_index.add_nodes,

963

inconsistency_fatal=inconsistency_fatal)

964

access = knit._DirectPackAccess({})

965

access.set_writer(writer, graph_index, (transport, 'newpack'))

966

result = GroupCompressVersionedFiles(index, access, delta)

967

result.stream = stream

968

result.writer = writer

969

return result

970

return factory

971

972

973

def cleanup_pack_group(versioned_files):

974

versioned_files.writer.end()

975

versioned_files.stream.close()

976

977

978

class GroupCompressVersionedFiles(VersionedFiles):

979

"""A group-compress based VersionedFiles implementation."""

980

981

def __init__(self, index, access, delta=True):

982

"""Create a GroupCompressVersionedFiles object.

983

984

:param index: The index object storing access and graph data.

985

:param access: The access object storing raw data.

986

:param delta: Whether to delta compress or just entropy compress.

987

"""

988

self._index = index

989

self._access = access

990

self._delta = delta

991

self._unadded_refs = {}

992

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

993

self._fallback_vfs = []

994

995

def add_lines(self, key, parents, lines, parent_texts=None,

996

left_matching_blocks=None, nostore_sha=None, random_id=False,

997

check_content=True):

998

"""Add a text to the store.

999

1000

:param key: The key tuple of the text to add.

1001

:param parents: The parents key tuples of the text to add.

1002

:param lines: A list of lines. Each line must be a bytestring. And all

1003

of them except the last must be terminated with \n and contain no

1004

other \n's. The last line may either contain no \n's or a single

1005

terminating \n. If the lines list does meet this constraint the add

1006

routine may error or may succeed - but you will be unable to read

1007

the data back accurately. (Checking the lines have been split

1008

correctly is expensive and extremely unlikely to catch bugs so it

1009

is not done at runtime unless check_content is True.)

1010

:param parent_texts: An optional dictionary containing the opaque

1011

representations of some or all of the parents of version_id to

1012

allow delta optimisations. VERY IMPORTANT: the texts must be those

1013

returned by add_lines or data corruption can be caused.

1014

:param left_matching_blocks: a hint about which areas are common

1015

between the text and its left-hand-parent. The format is

1016

the SequenceMatcher.get_matching_blocks format.

1017

:param nostore_sha: Raise ExistingContent and do not add the lines to

1018

the versioned file if the digest of the lines matches this.

1019

:param random_id: If True a random id has been selected rather than

1020

an id determined by some deterministic process such as a converter

1021

from a foreign VCS. When True the backend may choose not to check

1022

for uniqueness of the resulting key within the versioned file, so

1023

this should only be done when the result is expected to be unique

1024

anyway.

1025

:param check_content: If True, the lines supplied are verified to be

1026

bytestrings that are correctly formed lines.

1027

:return: The text sha1, the number of bytes in the text, and an opaque

1028

representation of the inserted version which can be provided

1029

back to future add_lines calls in the parent_texts dictionary.

1030

"""

1031

self._index._check_write_ok()

1032

self._check_add(key, lines, random_id, check_content)

1033

if parents is None:

1034

# The caller might pass None if there is no graph data, but kndx

1035

# indexes can't directly store that, so we give them

1036

# an empty tuple instead.

1037

parents = ()

1038

# double handling for now. Make it work until then.

1039

length = sum(map(len, lines))

1040

record = ChunkedContentFactory(key, parents, None, lines)

1041

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1042

nostore_sha=nostore_sha))[0]

1043

return sha1, length, None

1044

1045

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1046

"""See VersionedFiles._add_text()."""

1047

self._index._check_write_ok()

1048

self._check_add(key, None, random_id, check_content=False)

1049

if text.__class__ is not str:

1050

raise errors.BzrBadParameterUnicode("text")

1051

if parents is None:

1052

# The caller might pass None if there is no graph data, but kndx

1053

# indexes can't directly store that, so we give them

1054

# an empty tuple instead.

1055

parents = ()

1056

# double handling for now. Make it work until then.

1057

length = len(text)

1058

record = FulltextContentFactory(key, parents, None, text)

1059

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1060

nostore_sha=nostore_sha))[0]

1061

return sha1, length, None

1062

1063

def add_fallback_versioned_files(self, a_versioned_files):

1064

"""Add a source of texts for texts not present in this knit.

1065

1066

:param a_versioned_files: A VersionedFiles object.

1067

"""

1068

self._fallback_vfs.append(a_versioned_files)

1069

1070

def annotate(self, key):

1071

"""See VersionedFiles.annotate."""

1072

graph = Graph(self)

1073

parent_map = self.get_parent_map([key])

1074

if not parent_map:

1075

raise errors.RevisionNotPresent(key, self)

1076

if parent_map[key] is not None:

1077

parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])

1078

if v is not None)

1079

keys = parent_map.keys()

1080

else:

1081

keys = [key]

1082

parent_map = {key:()}

1083

# We used Graph(self) to load the parent_map, but now that we have it,

1084

# we can just query the parent map directly, so create a KnownGraph

1085

heads_provider = _mod_graph.KnownGraph(parent_map)

1086

parent_cache = {}

1087

reannotate = annotate.reannotate

1088

for record in self.get_record_stream(keys, 'topological', True):

1089

key = record.key

1090

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1091

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

1092

parent_cache[key] = list(

1093

reannotate(parent_lines, lines, key, None, heads_provider))

1094

return parent_cache[key]

1095

1096

def check(self, progress_bar=None):

1097

"""See VersionedFiles.check()."""

1098

keys = self.keys()

1099

for record in self.get_record_stream(keys, 'unordered', True):

1100

record.get_bytes_as('fulltext')

1101

1102

def _check_add(self, key, lines, random_id, check_content):

1103

"""check that version_id and lines are safe to add."""

1104

version_id = key[-1]

1105

if version_id is not None:

1106

if osutils.contains_whitespace(version_id):

1107

raise errors.InvalidRevisionId(version_id, self)

1108

self.check_not_reserved_id(version_id)

1109

# TODO: If random_id==False and the key is already present, we should

1110

# probably check that the existing content is identical to what is

1111

# being inserted, and otherwise raise an exception. This would make

1112

# the bundle code simpler.

1113

if check_content:

1114

self._check_lines_not_unicode(lines)

1115

self._check_lines_are_lines(lines)

1116

1117

def get_parent_map(self, keys):

1118

"""Get a map of the graph parents of keys.

1119

1120

:param keys: The keys to look up parents for.

1121

:return: A mapping from keys to parents. Absent keys are absent from

1122

the mapping.

1123

"""

1124

return self._get_parent_map_with_sources(keys)[0]

1125

1126

def _get_parent_map_with_sources(self, keys):

1127

"""Get a map of the parents of keys.

1128

1129

:param keys: The keys to look up parents for.

1130

:return: A tuple. The first element is a mapping from keys to parents.

1131

Absent keys are absent from the mapping. The second element is a

1132

list with the locations each key was found in. The first element

1133

is the in-this-knit parents, the second the first fallback source,

1134

and so on.

1135

"""

1136

result = {}

1137

sources = [self._index] + self._fallback_vfs

1138

source_results = []

1139

missing = set(keys)

1140

for source in sources:

1141

if not missing:

1142

break

1143

new_result = source.get_parent_map(missing)

1144

source_results.append(new_result)

1145

result.update(new_result)

1146

missing.difference_update(set(new_result))

1147

return result, source_results

1148

1149

def _get_block(self, index_memo):

1150

read_memo = index_memo[0:3]

1151

# get the group:

1152

try:

1153

block = self._group_cache[read_memo]

1154

except KeyError:

1155

# read the group

1156

zdata = self._access.get_raw_records([read_memo]).next()

1157

# decompress - whole thing - this is not a bug, as it

1158

# permits caching. We might want to store the partially

1159

# decompresed group and decompress object, so that recent

1160

# texts are not penalised by big groups.

1161

block = GroupCompressBlock.from_bytes(zdata)

1162

self._group_cache[read_memo] = block

1163

# cheapo debugging:

1164

# print len(zdata), len(plain)

1165

# parse - requires split_lines, better to have byte offsets

1166

# here (but not by much - we only split the region for the

1167

# recipe, and we often want to end up with lines anyway.

1168

return block

1169

1170

def get_missing_compression_parent_keys(self):

1171

"""Return the keys of missing compression parents.

1172

1173

Missing compression parents occur when a record stream was missing

1174

basis texts, or a index was scanned that had missing basis texts.

1175

"""

1176

# GroupCompress cannot currently reference texts that are not in the

1177

# group, so this is valid for now

1178

return frozenset()

1179

1180

def get_record_stream(self, keys, ordering, include_delta_closure):

1181

"""Get a stream of records for keys.

1182

1183

:param keys: The keys to include.

1184

:param ordering: Either 'unordered' or 'topological'. A topologically

1185

sorted stream has compression parents strictly before their

1186

children.

1187

:param include_delta_closure: If True then the closure across any

1188

compression parents will be included (in the opaque data).

1189

:return: An iterator of ContentFactory objects, each of which is only

1190

valid until the iterator is advanced.

1191

"""

1192

# keys might be a generator

1193

orig_keys = list(keys)

1194

keys = set(keys)

1195

if not keys:

1196

return

1197

if (not self._index.has_graph

1198

and ordering in ('topological', 'groupcompress')):

1199

# Cannot topological order when no graph has been stored.

1200

# but we allow 'as-requested' or 'unordered'

1201

ordering = 'unordered'

1202

1203

remaining_keys = keys

1204

while True:

1205

try:

1206

keys = set(remaining_keys)

1207

for content_factory in self._get_remaining_record_stream(keys,

1208

orig_keys, ordering, include_delta_closure):

1209

remaining_keys.discard(content_factory.key)

1210

yield content_factory

1211

return

1212

except errors.RetryWithNewPacks, e:

1213

self._access.reload_or_raise(e)

1214

1215

def _find_from_fallback(self, missing):

1216

"""Find whatever keys you can from the fallbacks.

1217

1218

:param missing: A set of missing keys. This set will be mutated as keys

1219

are found from a fallback_vfs

1220

:return: (parent_map, key_to_source_map, source_results)

1221

parent_map the overall key => parent_keys

1222

key_to_source_map a dict from {key: source}

1223

source_results a list of (source: keys)

1224

"""

1225

parent_map = {}

1226

key_to_source_map = {}

1227

source_results = []

1228

for source in self._fallback_vfs:

1229

if not missing:

1230

break

1231

source_parents = source.get_parent_map(missing)

1232

parent_map.update(source_parents)

1233

source_parents = list(source_parents)

1234

source_results.append((source, source_parents))

1235

key_to_source_map.update((key, source) for key in source_parents)

1236

missing.difference_update(source_parents)

1237

return parent_map, key_to_source_map, source_results

1238

1239

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1240

"""Get the (source, [keys]) list.

1241

1242

The returned objects should be in the order defined by 'ordering',

1243

which can weave between different sources.

1244

:param ordering: Must be one of 'topological' or 'groupcompress'

1245

:return: List of [(source, [keys])] tuples, such that all keys are in

1246

the defined order, regardless of source.

1247

"""

1248

if ordering == 'topological':

1249

present_keys = topo_sort(parent_map)

1250

else:

1251

# ordering == 'groupcompress'

1252

# XXX: This only optimizes for the target ordering. We may need

1253

# to balance that with the time it takes to extract

1254

# ordering, by somehow grouping based on

1255

# locations[key][0:3]

1256

present_keys = sort_gc_optimal(parent_map)

1257

# Now group by source:

1258

source_keys = []

1259

current_source = None

1260

for key in present_keys:

1261

source = key_to_source_map.get(key, self)

1262

if source is not current_source:

1263

source_keys.append((source, []))

1264

current_source = source

1265

source_keys[-1][1].append(key)

1266

return source_keys

1267

1268

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1269

key_to_source_map):

1270

source_keys = []

1271

current_source = None

1272

for key in orig_keys:

1273

if key in locations or key in unadded_keys:

1274

source = self

1275

elif key in key_to_source_map:

1276

source = key_to_source_map[key]

1277

else: # absent

1278

continue

1279

if source is not current_source:

1280

source_keys.append((source, []))

1281

current_source = source

1282

source_keys[-1][1].append(key)

1283

return source_keys

1284

1285

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1286

source_result):

1287

def get_group(key):

1288

# This is the group the bytes are stored in, followed by the

1289

# location in the group

1290

return locations[key][0]

1291

present_keys = sorted(locations.iterkeys(), key=get_group)

1292

# We don't have an ordering for keys in the in-memory object, but

1293

# lets process the in-memory ones first.

1294

present_keys = list(unadded_keys) + present_keys

1295

# Now grab all of the ones from other sources

1296

source_keys = [(self, present_keys)]

1297

source_keys.extend(source_result)

1298

return source_keys

1299

1300

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1301

include_delta_closure):

1302

"""Get a stream of records for keys.

1303

1304

:param keys: The keys to include.

1305

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1306

'as-requested'

1307

:param include_delta_closure: If True then the closure across any

1308

compression parents will be included (in the opaque data).

1309

:return: An iterator of ContentFactory objects, each of which is only

1310

valid until the iterator is advanced.

1311

"""

1312

# Cheap: iterate

1313

locations = self._index.get_build_details(keys)

1314

unadded_keys = set(self._unadded_refs).intersection(keys)

1315

missing = keys.difference(locations)

1316

missing.difference_update(unadded_keys)

1317

(fallback_parent_map, key_to_source_map,

1318

source_result) = self._find_from_fallback(missing)

1319

if ordering in ('topological', 'groupcompress'):

1320

# would be better to not globally sort initially but instead

1321

# start with one key, recurse to its oldest parent, then grab

1322

# everything in the same group, etc.

1323

parent_map = dict((key, details[2]) for key, details in

1324

locations.iteritems())

1325

for key in unadded_keys:

1326

parent_map[key] = self._unadded_refs[key]

1327

parent_map.update(fallback_parent_map)

1328

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1329

key_to_source_map)

1330

elif ordering == 'as-requested':

1331

source_keys = self._get_as_requested_source_keys(orig_keys,

1332

locations, unadded_keys, key_to_source_map)

1333

else:

1334

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1335

# Otherwise we thrash the _group_cache and destroy performance

1336

source_keys = self._get_io_ordered_source_keys(locations,

1337

unadded_keys, source_result)

1338

for key in missing:

1339

yield AbsentContentFactory(key)

1340

manager = None

1341

last_read_memo = None

1342

# TODO: This works fairly well at batching up existing groups into a

1343

# streamable format, and possibly allowing for taking one big

1344

# group and splitting it when it isn't fully utilized.

1345

# However, it doesn't allow us to find under-utilized groups and

1346

# combine them into a bigger group on the fly.

1347

# (Consider the issue with how chk_map inserts texts

1348

# one-at-a-time.) This could be done at insert_record_stream()

1349

# time, but it probably would decrease the number of

1350

# bytes-on-the-wire for fetch.

1351

for source, keys in source_keys:

1352

if source is self:

1353

for key in keys:

1354

if key in self._unadded_refs:

1355

if manager is not None:

1356

for factory in manager.get_record_stream():

1357

yield factory

1358

last_read_memo = manager = None

1359

bytes, sha1 = self._compressor.extract(key)

1360

parents = self._unadded_refs[key]

1361

yield FulltextContentFactory(key, parents, sha1, bytes)

1362

else:

1363

index_memo, _, parents, (method, _) = locations[key]

1364

read_memo = index_memo[0:3]

1365

if last_read_memo != read_memo:

1366

# We are starting a new block. If we have a

1367

# manager, we have found everything that fits for

1368

# now, so yield records

1369

if manager is not None:

1370

for factory in manager.get_record_stream():

1371

yield factory

1372

# Now start a new manager

1373

block = self._get_block(index_memo)

1374

manager = _LazyGroupContentManager(block)

1375

last_read_memo = read_memo

1376

start, end = index_memo[3:5]

1377

manager.add_factory(key, parents, start, end)

1378

else:

1379

if manager is not None:

1380

for factory in manager.get_record_stream():

1381

yield factory

1382

last_read_memo = manager = None

1383

for record in source.get_record_stream(keys, ordering,

1384

include_delta_closure):

1385

yield record

1386

if manager is not None:

1387

for factory in manager.get_record_stream():

1388

yield factory

1389

1390

def get_sha1s(self, keys):

1391

"""See VersionedFiles.get_sha1s()."""

1392

result = {}

1393

for record in self.get_record_stream(keys, 'unordered', True):

1394

if record.sha1 != None:

1395

result[record.key] = record.sha1

1396

else:

1397

if record.storage_kind != 'absent':

1398

result[record.key] = osutils.sha_string(

1399

record.get_bytes_as('fulltext'))

1400

return result

1401

1402

def insert_record_stream(self, stream):

1403

"""Insert a record stream into this container.

1404

1405

:param stream: A stream of records to insert.

1406

:return: None

1407

:seealso VersionedFiles.get_record_stream:

1408

"""

1409

# XXX: Setting random_id=True makes

1410

# test_insert_record_stream_existing_keys fail for groupcompress and

1411

# groupcompress-nograph, this needs to be revisited while addressing

1412

# 'bzr branch' performance issues.

1413

for _ in self._insert_record_stream(stream, random_id=False):

1414

pass

1415

1416

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1417

reuse_blocks=True):

1418

"""Internal core to insert a record stream into this container.

1419

1420

This helper function has a different interface than insert_record_stream

1421

to allow add_lines to be minimal, but still return the needed data.

1422

1423

:param stream: A stream of records to insert.

1424

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1425

raise ExistingContent, rather than committing the new text.

1426

:param reuse_blocks: If the source is streaming from

1427

groupcompress-blocks, just insert the blocks as-is, rather than

1428

expanding the texts and inserting again.

1429

:return: An iterator over the sha1 of the inserted records.

1430

:seealso insert_record_stream:

1431

:seealso add_lines:

1432

"""

1433

adapters = {}

1434

def get_adapter(adapter_key):

1435

try:

1436

return adapters[adapter_key]

1437

except KeyError:

1438

adapter_factory = adapter_registry.get(adapter_key)

1439

adapter = adapter_factory(self)

1440

adapters[adapter_key] = adapter

1441

return adapter

1442

# This will go up to fulltexts for gc to gc fetching, which isn't

1443

# ideal.

1444

self._compressor = GroupCompressor()

1445

self._unadded_refs = {}

1446

keys_to_add = []

1447

def flush():

1448

bytes = self._compressor.flush().to_bytes()

1449

index, start, length = self._access.add_raw_records(

1450

[(None, len(bytes))], bytes)[0]

1451

nodes = []

1452

for key, reads, refs in keys_to_add:

1453

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1454

self._index.add_records(nodes, random_id=random_id)

1455

self._unadded_refs = {}

1456

del keys_to_add[:]

1457

self._compressor = GroupCompressor()

1458

1459

last_prefix = None

1460

max_fulltext_len = 0

1461

max_fulltext_prefix = None

1462

insert_manager = None

1463

block_start = None

1464

block_length = None

1465

# XXX: TODO: remove this, it is just for safety checking for now

1466

inserted_keys = set()

1467

for record in stream:

1468

# Raise an error when a record is missing.

1469

if record.storage_kind == 'absent':

1470

raise errors.RevisionNotPresent(record.key, self)

1471

if random_id:

1472

if record.key in inserted_keys:

1473

trace.note('Insert claimed random_id=True,'

1474

' but then inserted %r two times', record.key)

1475

continue

1476

inserted_keys.add(record.key)

1477

if reuse_blocks:

1478

# If the reuse_blocks flag is set, check to see if we can just

1479

# copy a groupcompress block as-is.

1480

if record.storage_kind == 'groupcompress-block':

1481

# Insert the raw block into the target repo

1482

insert_manager = record._manager

1483

insert_manager._check_rebuild_block()

1484

bytes = record._manager._block.to_bytes()

1485

_, start, length = self._access.add_raw_records(

1486

[(None, len(bytes))], bytes)[0]

1487

del bytes

1488

block_start = start

1489

block_length = length

1490

if record.storage_kind in ('groupcompress-block',

1491

'groupcompress-block-ref'):

1492

if insert_manager is None:

1493

raise AssertionError('No insert_manager set')

1494

value = "%d %d %d %d" % (block_start, block_length,

1495

record._start, record._end)

1496

nodes = [(record.key, value, (record.parents,))]

1497

# TODO: Consider buffering up many nodes to be added, not

1498

# sure how much overhead this has, but we're seeing

1499

# ~23s / 120s in add_records calls

1500

self._index.add_records(nodes, random_id=random_id)

1501

continue

1502

try:

1503

bytes = record.get_bytes_as('fulltext')

1504

except errors.UnavailableRepresentation:

1505

adapter_key = record.storage_kind, 'fulltext'

1506

adapter = get_adapter(adapter_key)

1507

bytes = adapter.get_bytes(record)

1508

if len(record.key) > 1:

1509

prefix = record.key[0]

1510

soft = (prefix == last_prefix)

1511

else:

1512

prefix = None

1513

soft = False

1514

if max_fulltext_len < len(bytes):

1515

max_fulltext_len = len(bytes)

1516

max_fulltext_prefix = prefix

1517

(found_sha1, start_point, end_point,

1518

type) = self._compressor.compress(record.key,

1519

bytes, record.sha1, soft=soft,

1520

nostore_sha=nostore_sha)

1521

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1522

# Check if we want to continue to include that text

1523

if (prefix == max_fulltext_prefix

1524

and end_point < 2 * max_fulltext_len):

1525

# As long as we are on the same file_id, we will fill at least

1526

# 2 * max_fulltext_len

1527

start_new_block = False

1528

elif end_point > 4*1024*1024:

1529

start_new_block = True

1530

elif (prefix is not None and prefix != last_prefix

1531

and end_point > 2*1024*1024):

1532

start_new_block = True

1533

else:

1534

start_new_block = False

1535

last_prefix = prefix

1536

if start_new_block:

1537

self._compressor.pop_last()

1538

flush()

1539

max_fulltext_len = len(bytes)

1540

(found_sha1, start_point, end_point,

1541

type) = self._compressor.compress(record.key, bytes,

1542

record.sha1)

1543

if record.key[-1] is None:

1544

key = record.key[:-1] + ('sha1:' + found_sha1,)

1545

else:

1546

key = record.key

1547

self._unadded_refs[key] = record.parents

1548

yield found_sha1

1549

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1550

(record.parents,)))

1551

if len(keys_to_add):

1552

flush()

1553

self._compressor = None

1554

1555

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1556

"""Iterate over the lines in the versioned files from keys.

1557

1558

This may return lines from other keys. Each item the returned

1559

iterator yields is a tuple of a line and a text version that that line

1560

is present in (not introduced in).

1561

1562

Ordering of results is in whatever order is most suitable for the

1563

underlying storage format.

1564

1565

If a progress bar is supplied, it may be used to indicate progress.

1566

The caller is responsible for cleaning up progress bars (because this

1567

is an iterator).

1568

1569

NOTES:

1570

* Lines are normalised by the underlying store: they will all have \n

1571

terminators.

1572

* Lines are returned in arbitrary order.

1573

1574

:return: An iterator over (line, key).

1575

"""

1576

keys = set(keys)

1577

total = len(keys)

1578

# we don't care about inclusions, the caller cares.

1579

# but we need to setup a list of records to visit.

1580

# we need key, position, length

1581

for key_idx, record in enumerate(self.get_record_stream(keys,

1582

'unordered', True)):

1583

# XXX: todo - optimise to use less than full texts.

1584

key = record.key

1585

if pb is not None:

1586

pb.update('Walking content', key_idx, total)

1587

if record.storage_kind == 'absent':

1588

raise errors.RevisionNotPresent(key, self)

1589

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1590

for line in lines:

1591

yield line, key

1592

if pb is not None:

1593

pb.update('Walking content', total, total)

1594

1595

def keys(self):

1596

"""See VersionedFiles.keys."""

1597

if 'evil' in debug.debug_flags:

1598

trace.mutter_callsite(2, "keys scales with size of history")

1599

sources = [self._index] + self._fallback_vfs

1600

result = set()

1601

for source in sources:

1602

result.update(source.keys())

1603

return result

1604

1605

1606

class _GCGraphIndex(object):

1607

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1608

1609

def __init__(self, graph_index, is_locked, parents=True,

1610

add_callback=None, track_external_parent_refs=False,

1611

inconsistency_fatal=True):

1612

"""Construct a _GCGraphIndex on a graph_index.

1613

1614

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1615

:param is_locked: A callback, returns True if the index is locked and

1616

thus usable.

1617

:param parents: If True, record knits parents, if not do not record

1618

parents.

1619

:param add_callback: If not None, allow additions to the index and call

1620

this callback with a list of added GraphIndex nodes:

1621

[(node, value, node_refs), ...]

1622

:param track_external_parent_refs: As keys are added, keep track of the

1623

keys they reference, so that we can query get_missing_parents(),

1624

etc.

1625

:param inconsistency_fatal: When asked to add records that are already

1626

present, and the details are inconsistent with the existing

1627

record, raise an exception instead of warning (and skipping the

1628

record).

1629

"""

1630

self._add_callback = add_callback

1631

self._graph_index = graph_index

1632

self._parents = parents

1633

self.has_graph = parents

1634

self._is_locked = is_locked

1635

self._inconsistency_fatal = inconsistency_fatal

1636

if track_external_parent_refs:

1637

self._key_dependencies = knit._KeyRefs()

1638

else:

1639

self._key_dependencies = None

1640

1641

def add_records(self, records, random_id=False):

1642

"""Add multiple records to the index.

1643

1644

This function does not insert data into the Immutable GraphIndex

1645

backing the KnitGraphIndex, instead it prepares data for insertion by

1646

the caller and checks that it is safe to insert then calls

1647

self._add_callback with the prepared GraphIndex nodes.

1648

1649

:param records: a list of tuples:

1650

(key, options, access_memo, parents).

1651

:param random_id: If True the ids being added were randomly generated

1652

and no check for existence will be performed.

1653

"""

1654

if not self._add_callback:

1655

raise errors.ReadOnlyError(self)

1656

# we hope there are no repositories with inconsistent parentage

1657

# anymore.

1658

1659

changed = False

1660

keys = {}

1661

for (key, value, refs) in records:

1662

if not self._parents:

1663

if refs:

1664

for ref in refs:

1665

if ref:

1666

raise errors.KnitCorrupt(self,

1667

"attempt to add node with parents "

1668

"in parentless index.")

1669

refs = ()

1670

changed = True

1671

keys[key] = (value, refs)

1672

# check for dups

1673

if not random_id:

1674

present_nodes = self._get_entries(keys)

1675

for (index, key, value, node_refs) in present_nodes:

1676

if node_refs != keys[key][1]:

1677

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1678

if self._inconsistency_fatal:

1679

raise errors.KnitCorrupt(self, "inconsistent details"

1680

" in add_records: %s" %

1681

details)

1682

else:

1683

trace.warning("inconsistent details in skipped"

1684

" record: %s", details)

1685

del keys[key]

1686

changed = True

1687

if changed:

1688

result = []

1689

if self._parents:

1690

for key, (value, node_refs) in keys.iteritems():

1691

result.append((key, value, node_refs))

1692

else:

1693

for key, (value, node_refs) in keys.iteritems():

1694

result.append((key, value))

1695

records = result

1696

key_dependencies = self._key_dependencies

1697

if key_dependencies is not None and self._parents:

1698

for key, value, refs in records:

1699

parents = refs[0]

1700

key_dependencies.add_references(key, parents)

1701

self._add_callback(records)

1702

1703

def _check_read(self):

1704

"""Raise an exception if reads are not permitted."""

1705

if not self._is_locked():

1706

raise errors.ObjectNotLocked(self)

1707

1708

def _check_write_ok(self):

1709

"""Raise an exception if writes are not permitted."""

1710

if not self._is_locked():

1711

raise errors.ObjectNotLocked(self)

1712

1713

def _get_entries(self, keys, check_present=False):

1714

"""Get the entries for keys.

1715

1716

Note: Callers are responsible for checking that the index is locked

1717

before calling this method.

1718

1719

:param keys: An iterable of index key tuples.

1720

"""

1721

keys = set(keys)

1722

found_keys = set()

1723

if self._parents:

1724

for node in self._graph_index.iter_entries(keys):

1725

yield node

1726

found_keys.add(node[1])

1727

else:

1728

# adapt parentless index to the rest of the code.

1729

for node in self._graph_index.iter_entries(keys):

1730

yield node[0], node[1], node[2], ()

1731

found_keys.add(node[1])

1732

if check_present:

1733

missing_keys = keys.difference(found_keys)

1734

if missing_keys:

1735

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1736

1737

def get_parent_map(self, keys):

1738

"""Get a map of the parents of keys.

1739

1740

:param keys: The keys to look up parents for.

1741

:return: A mapping from keys to parents. Absent keys are absent from

1742

the mapping.

1743

"""

1744

self._check_read()

1745

nodes = self._get_entries(keys)

1746

result = {}

1747

if self._parents:

1748

for node in nodes:

1749

result[node[1]] = node[3][0]

1750

else:

1751

for node in nodes:

1752

result[node[1]] = None

1753

return result

1754

1755

def get_missing_parents(self):

1756

"""Return the keys of missing parents."""

1757

# Copied from _KnitGraphIndex.get_missing_parents

1758

# We may have false positives, so filter those out.

1759

self._key_dependencies.add_keys(

1760

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1761

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1762

1763

def get_build_details(self, keys):

1764

"""Get the various build details for keys.

1765

1766

Ghosts are omitted from the result.

1767

1768

:param keys: An iterable of keys.

1769

:return: A dict of key:

1770

(index_memo, compression_parent, parents, record_details).

1771

index_memo

1772

opaque structure to pass to read_records to extract the raw

1773

data

1774

compression_parent

1775

Content that this record is built upon, may be None

1776

parents

1777

Logical parents of this node

1778

record_details

1779

extra information about the content which needs to be passed to

1780

Factory.parse_record

1781

"""

1782

self._check_read()

1783

result = {}

1784

entries = self._get_entries(keys)

1785

for entry in entries:

1786

key = entry[1]

1787

if not self._parents:

1788

parents = None

1789

else:

1790

parents = entry[3][0]

1791

method = 'group'

1792

result[key] = (self._node_to_position(entry),

1793

None, parents, (method, None))

1794

return result

1795

1796

def keys(self):

1797

"""Get all the keys in the collection.

1798

1799

The keys are not ordered.

1800

"""

1801

self._check_read()

1802

return [node[1] for node in self._graph_index.iter_all_entries()]

1803

1804

def _node_to_position(self, node):

1805

"""Convert an index value to position details."""

1806

bits = node[2].split(' ')

1807

# It would be nice not to read the entire gzip.

1808

start = int(bits[0])

1809

stop = int(bits[1])

1810

basis_end = int(bits[2])

1811

delta_end = int(bits[3])

1812

return node[0], start, stop, basis_end, delta_end

1813

1814

def scan_unvalidated_index(self, graph_index):

1815

"""Inform this _GCGraphIndex that there is an unvalidated index.

1816

1817

This allows this _GCGraphIndex to keep track of any missing

1818

compression parents we may want to have filled in to make those

1819

indices valid.

1820

1821

:param graph_index: A GraphIndex

1822

"""

1823

if self._key_dependencies is not None:

1824

# Add parent refs from graph_index (and discard parent refs that

1825

# the graph_index has).

1826

add_refs = self._key_dependencies.add_references

1827

for node in graph_index.iter_all_entries():

1828

add_refs(node[1], node[3][0])

1829

1830

1831

1832

from bzrlib._groupcompress_py import (

1833

apply_delta,

1834

apply_delta_to_source,

1835

encode_base128_int,

1836

decode_base128_int,

1837

decode_copy_instruction,

1838

LinesDeltaIndex,

1839

)

1840

try:

1841

from bzrlib._groupcompress_pyx import (

1842

apply_delta,

1843

apply_delta_to_source,

1844

DeltaIndex,

1845

encode_base128_int,

1846

decode_base128_int,

1847

)

1848

GroupCompressor = PyrexGroupCompressor

1849

except ImportError:

1850

GroupCompressor = PythonGroupCompressor

1851

Older »