~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Canonical.com Patch Queue Manager
Date: 2008-03-16 14:01:20 UTC
mfrom: (3280.2.5 integration)
Revision ID: pqm@pqm.ubuntu.com-20080316140120-i3yq8yr1l66m11h7

Start 1.4 development

files added:
bzrlib/help_topics/en/hooks.txt

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_http_implementations.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/xml6.py

tools/win32/survey.txt

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/btree_index.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/inventory_delta.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/views.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers/btree_index_prefetch.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/integration.txt

doc/developers/lca_tree_merging.txt

doc/developers/overview.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/web_browsing.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/news-template.txt

tools/check-newsbugs.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
bzrlib/tests/ftp_server/medusa_based.py => bzrlib/tests/ftp_server.py

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/developer-guide/HACKING.txt => doc/developers/HACKING.txt

doc/en/user-guide/part2_intro.txt => doc/en/user-guide/best_practice_intro.txt

doc/en/user-guide/zen.txt => doc/en/user-guide/revnos.txt

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/email_message.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib/bzr_access

contrib/newinventory.py

contrib/pwclient.full

doc/default.css

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/container-format.txt

doc/developers/development-repo.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/lca-merge.txt

doc/developers/merge-scaling.txt

doc/developers/network-protocol.txt

doc/developers/performance-contributing.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/repository.txt

doc/developers/revision-properties.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/writing_a_plugin.txt

doc/index.txt

generate_docs.py

profile_imports.py

setup.py

tools/bzr_epydoc_uid.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/package_mf.py

tools/rst2html.py

tools/weavebench.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

from itertools import izip

from cStringIO import StringIO

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

diff,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

patiencediff,

trace,

)

from bzrlib.graph import Graph

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

except KeyError:

per_prefix_map[prefix] = [item]

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

100

GCB_LZ_HEADER = 'gcb1l\n'

101

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

102

103

def __init__(self):

104

# map by key? or just order in file?

105

self._compressor_name = None

106

self._z_content = None

107

self._z_content_decompressor = None

108

self._z_content_length = None

109

self._content_length = None

110

self._content = None

111

112

def __len__(self):

113

# This is the maximum number of bytes this object will reference if

114

# everything is decompressed. However, if we decompress less than

115

# everything... (this would cause some problems for LRUSizeCache)

116

return self._content_length + self._z_content_length

117

118

def _ensure_content(self, num_bytes=None):

119

"""Make sure that content has been expanded enough.

120

121

:param num_bytes: Ensure that we have extracted at least num_bytes of

122

content. If None, consume everything

123

"""

124

# TODO: If we re-use the same content block at different times during

125

# get_record_stream(), it is possible that the first pass will

126

# get inserted, triggering an extract/_ensure_content() which

127

# will get rid of _z_content. And then the next use of the block

128

# will try to access _z_content (to send it over the wire), and

129

# fail because it is already extracted. Consider never releasing

130

# _z_content because of this.

131

if num_bytes is None:

132

num_bytes = self._content_length

133

elif (self._content_length is not None

134

and num_bytes > self._content_length):

135

raise AssertionError(

136

'requested num_bytes (%d) > content length (%d)'

137

% (num_bytes, self._content_length))

138

# Expand the content if required

139

if self._content is None:

140

if self._z_content is None:

141

raise AssertionError('No content to decompress')

142

if self._z_content == '':

143

self._content = ''

144

elif self._compressor_name == 'lzma':

145

# We don't do partial lzma decomp yet

146

self._content = pylzma.decompress(self._z_content)

147

elif self._compressor_name == 'zlib':

148

# Start a zlib decompressor

149

if num_bytes is None:

150

self._content = zlib.decompress(self._z_content)

151

else:

152

self._z_content_decompressor = zlib.decompressobj()

153

# Seed the decompressor with the uncompressed bytes, so

154

# that the rest of the code is simplified

155

self._content = self._z_content_decompressor.decompress(

156

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

157

else:

158

raise AssertionError('Unknown compressor: %r'

159

% self._compressor_name)

160

# Any bytes remaining to be decompressed will be in the decompressors

161

# 'unconsumed_tail'

162

163

# Do we have enough bytes already?

164

if num_bytes is not None and len(self._content) >= num_bytes:

165

return

166

if num_bytes is None and self._z_content_decompressor is None:

167

# We must have already decompressed everything

168

return

169

# If we got this far, and don't have a decompressor, something is wrong

170

if self._z_content_decompressor is None:

171

raise AssertionError(

172

'No decompressor to decompress %d bytes' % num_bytes)

173

remaining_decomp = self._z_content_decompressor.unconsumed_tail

174

if num_bytes is None:

175

if remaining_decomp:

176

# We don't know how much is left, but we'll decompress it all

177

self._content += self._z_content_decompressor.decompress(

178

remaining_decomp)

179

# Note: There's what I consider a bug in zlib.decompressobj

180

# If you pass back in the entire unconsumed_tail, only

181

# this time you don't pass a max-size, it doesn't

182

# change the unconsumed_tail back to None/''.

183

# However, we know we are done with the whole stream

184

self._z_content_decompressor = None

185

# XXX: Why is this the only place in this routine we set this?

186

self._content_length = len(self._content)

187

else:

188

if not remaining_decomp:

189

raise AssertionError('Nothing left to decompress')

190

needed_bytes = num_bytes - len(self._content)

191

# We always set max_size to 32kB over the minimum needed, so that

192

# zlib will give us as much as we really want.

193

# TODO: If this isn't good enough, we could make a loop here,

194

# that keeps expanding the request until we get enough

195

self._content += self._z_content_decompressor.decompress(

196

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

197

if len(self._content) < num_bytes:

198

raise AssertionError('%d bytes wanted, only %d available'

199

% (num_bytes, len(self._content)))

200

if not self._z_content_decompressor.unconsumed_tail:

201

# The stream is finished

202

self._z_content_decompressor = None

203

204

def _parse_bytes(self, bytes, pos):

205

"""Read the various lengths from the header.

206

207

This also populates the various 'compressed' buffers.

208

209

:return: The position in bytes just after the last newline

210

"""

211

# At present, we have 2 integers for the compressed and uncompressed

212

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

213

# checking too far, cap the search to 14 bytes.

214

pos2 = bytes.index('\n', pos, pos + 14)

215

self._z_content_length = int(bytes[pos:pos2])

216

pos = pos2 + 1

217

pos2 = bytes.index('\n', pos, pos + 14)

218

self._content_length = int(bytes[pos:pos2])

219

pos = pos2 + 1

220

if len(bytes) != (pos + self._z_content_length):

221

# XXX: Define some GCCorrupt error ?

222

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

223

(len(bytes), pos, self._z_content_length))

224

self._z_content = bytes[pos:]

225

226

@classmethod

227

def from_bytes(cls, bytes):

228

out = cls()

229

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

230

raise ValueError('bytes did not start with any of %r'

231

% (cls.GCB_KNOWN_HEADERS,))

232

# XXX: why not testing the whole header ?

233

if bytes[4] == 'z':

234

out._compressor_name = 'zlib'

235

elif bytes[4] == 'l':

236

out._compressor_name = 'lzma'

237

else:

238

raise ValueError('unknown compressor: %r' % (bytes,))

239

out._parse_bytes(bytes, 6)

240

return out

241

242

def extract(self, key, start, end, sha1=None):

243

"""Extract the text for a specific key.

244

245

:param key: The label used for this content

246

:param sha1: TODO (should we validate only when sha1 is supplied?)

247

:return: The bytes for the content

248

"""

249

if start == end == 0:

250

return ''

251

self._ensure_content(end)

252

# The bytes are 'f' or 'd' for the type, then a variable-length

253

# base128 integer for the content size, then the actual content

254

# We know that the variable-length integer won't be longer than 5

255

# bytes (it takes 5 bytes to encode 2^32)

256

c = self._content[start]

257

if c == 'f':

258

type = 'fulltext'

259

else:

260

if c != 'd':

261

raise ValueError('Unknown content control code: %s'

262

% (c,))

263

type = 'delta'

264

content_len, len_len = decode_base128_int(

265

self._content[start + 1:start + 6])

266

content_start = start + 1 + len_len

267

if end != content_start + content_len:

268

raise ValueError('end != len according to field header'

269

' %s != %s' % (end, content_start + content_len))

270

if c == 'f':

271

bytes = self._content[content_start:end]

272

elif c == 'd':

273

bytes = apply_delta_to_source(self._content, content_start, end)

274

return bytes

275

276

def set_content(self, content):

277

"""Set the content of this block."""

278

self._content_length = len(content)

279

self._content = content

280

self._z_content = None

281

282

def to_bytes(self):

283

"""Encode the information into a byte stream."""

284

compress = zlib.compress

285

if _USE_LZMA:

286

compress = pylzma.compress

287

if self._z_content is None:

288

if self._content is None:

289

raise AssertionError('Nothing to compress')

290

self._z_content = compress(self._content)

291

self._z_content_length = len(self._z_content)

292

if _USE_LZMA:

293

header = self.GCB_LZ_HEADER

294

else:

295

header = self.GCB_HEADER

296

chunks = [header,

297

'%d\n%d\n' % (self._z_content_length, self._content_length),

298

self._z_content,

299

]

300

return ''.join(chunks)

301

302

def _dump(self, include_text=False):

303

"""Take this block, and spit out a human-readable structure.

304

305

:param include_text: Inserts also include text bits, chose whether you

306

want this displayed in the dump or not.

307

:return: A dump of the given block. The layout is something like:

308

[('f', length), ('d', delta_length, text_length, [delta_info])]

309

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

310

...]

311

"""

312

self._ensure_content()

313

result = []

314

pos = 0

315

while pos < self._content_length:

316

kind = self._content[pos]

317

pos += 1

318

if kind not in ('f', 'd'):

319

raise ValueError('invalid kind character: %r' % (kind,))

320

content_len, len_len = decode_base128_int(

321

self._content[pos:pos + 5])

322

pos += len_len

323

if content_len + pos > self._content_length:

324

raise ValueError('invalid content_len %d for record @ pos %d'

325

% (content_len, pos - len_len - 1))

326

if kind == 'f': # Fulltext

327

result.append(('f', content_len))

328

elif kind == 'd': # Delta

329

delta_content = self._content[pos:pos+content_len]

330

delta_info = []

331

# The first entry in a delta is the decompressed length

332

decomp_len, delta_pos = decode_base128_int(delta_content)

333

result.append(('d', content_len, decomp_len, delta_info))

334

measured_len = 0

335

while delta_pos < content_len:

336

c = ord(delta_content[delta_pos])

337

delta_pos += 1

338

if c & 0x80: # Copy

339

(offset, length,

340

delta_pos) = decode_copy_instruction(delta_content, c,

341

delta_pos)

342

delta_info.append(('c', offset, length))

343

measured_len += length

344

else: # Insert

345

if include_text:

346

txt = delta_content[delta_pos:delta_pos+c]

347

else:

348

txt = ''

349

delta_info.append(('i', c, txt))

350

measured_len += c

351

delta_pos += c

352

if delta_pos != content_len:

353

raise ValueError('Delta consumed a bad number of bytes:'

354

' %d != %d' % (delta_pos, content_len))

355

if measured_len != decomp_len:

356

raise ValueError('Delta claimed fulltext was %d bytes, but'

357

' extraction resulted in %d bytes'

358

% (decomp_len, measured_len))

359

pos += content_len

360

return result

361

362

363

class _LazyGroupCompressFactory(object):

364

"""Yield content from a GroupCompressBlock on demand."""

365

366

def __init__(self, key, parents, manager, start, end, first):

367

"""Create a _LazyGroupCompressFactory

368

369

:param key: The key of just this record

370

:param parents: The parents of this key (possibly None)

371

:param gc_block: A GroupCompressBlock object

372

:param start: Offset of the first byte for this record in the

373

uncompressd content

374

:param end: Offset of the byte just after the end of this record

375

(ie, bytes = content[start:end])

376

:param first: Is this the first Factory for the given block?

377

"""

378

self.key = key

379

self.parents = parents

380

self.sha1 = None

381

# Note: This attribute coupled with Manager._factories creates a

382

# reference cycle. Perhaps we would rather use a weakref(), or

383

# find an appropriate time to release the ref. After the first

384

# get_bytes_as call? After Manager.get_record_stream() returns

385

# the object?

386

self._manager = manager

387

self._bytes = None

388

self.storage_kind = 'groupcompress-block'

389

if not first:

390

self.storage_kind = 'groupcompress-block-ref'

391

self._first = first

392

self._start = start

393

self._end = end

394

395

def __repr__(self):

396

return '%s(%s, first=%s)' % (self.__class__.__name__,

397

self.key, self._first)

398

399

def get_bytes_as(self, storage_kind):

400

if storage_kind == self.storage_kind:

401

if self._first:

402

# wire bytes, something...

403

return self._manager._wire_bytes()

404

else:

405

return ''

406

if storage_kind in ('fulltext', 'chunked'):

407

if self._bytes is None:

408

# Grab and cache the raw bytes for this entry

409

# and break the ref-cycle with _manager since we don't need it

410

# anymore

411

self._manager._prepare_for_extract()

412

block = self._manager._block

413

self._bytes = block.extract(self.key, self._start, self._end)

414

# There are code paths that first extract as fulltext, and then

415

# extract as storage_kind (smart fetch). So we don't break the

416

# refcycle here, but instead in manager.get_record_stream()

417

# self._manager = None

418

if storage_kind == 'fulltext':

419

return self._bytes

420

else:

421

return [self._bytes]

422

raise errors.UnavailableRepresentation(self.key, storage_kind,

423

self.storage_kind)

424

425

426

class _LazyGroupContentManager(object):

427

"""This manages a group of _LazyGroupCompressFactory objects."""

428

429

def __init__(self, block):

430

self._block = block

431

# We need to preserve the ordering

432

self._factories = []

433

self._last_byte = 0

434

435

def add_factory(self, key, parents, start, end):

436

if not self._factories:

437

first = True

438

else:

439

first = False

440

# Note that this creates a reference cycle....

441

factory = _LazyGroupCompressFactory(key, parents, self,

442

start, end, first=first)

443

# max() works here, but as a function call, doing a compare seems to be

444

# significantly faster, timeit says 250ms for max() and 100ms for the

445

# comparison

446

if end > self._last_byte:

447

self._last_byte = end

448

self._factories.append(factory)

449

450

def get_record_stream(self):

451

"""Get a record for all keys added so far."""

452

for factory in self._factories:

453

yield factory

454

# Break the ref-cycle

455

factory._bytes = None

456

factory._manager = None

457

# TODO: Consider setting self._factories = None after the above loop,

458

# as it will break the reference cycle

459

460

def _trim_block(self, last_byte):

461

"""Create a new GroupCompressBlock, with just some of the content."""

462

# None of the factories need to be adjusted, because the content is

463

# located in an identical place. Just that some of the unreferenced

464

# trailing bytes are stripped

465

trace.mutter('stripping trailing bytes from groupcompress block'

466

' %d => %d', self._block._content_length, last_byte)

467

new_block = GroupCompressBlock()

468

self._block._ensure_content(last_byte)

469

new_block.set_content(self._block._content[:last_byte])

470

self._block = new_block

471

472

def _rebuild_block(self):

473

"""Create a new GroupCompressBlock with only the referenced texts."""

474

compressor = GroupCompressor()

475

tstart = time.time()

476

old_length = self._block._content_length

477

end_point = 0

478

for factory in self._factories:

479

bytes = factory.get_bytes_as('fulltext')

480

(found_sha1, start_point, end_point,

481

type) = compressor.compress(factory.key, bytes, factory.sha1)

482

# Now update this factory with the new offsets, etc

483

factory.sha1 = found_sha1

484

factory._start = start_point

485

factory._end = end_point

486

self._last_byte = end_point

487

new_block = compressor.flush()

488

# TODO: Should we check that new_block really *is* smaller than the old

489

# block? It seems hard to come up with a method that it would

490

# expand, since we do full compression again. Perhaps based on a

491

# request that ends up poorly ordered?

492

delta = time.time() - tstart

493

self._block = new_block

494

trace.mutter('creating new compressed block on-the-fly in %.3fs'

495

' %d bytes => %d bytes', delta, old_length,

496

self._block._content_length)

497

498

def _prepare_for_extract(self):

499

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

500

# We expect that if one child is going to fulltext, all will be. This

501

# helps prevent all of them from extracting a small amount at a time.

502

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

503

# time (self._block._content) is a little expensive.

504

self._block._ensure_content(self._last_byte)

505

506

def _check_rebuild_block(self):

507

"""Check to see if our block should be repacked."""

508

total_bytes_used = 0

509

last_byte_used = 0

510

for factory in self._factories:

511

total_bytes_used += factory._end - factory._start

512

last_byte_used = max(last_byte_used, factory._end)

513

# If we are using most of the bytes from the block, we have nothing

514

# else to check (currently more that 1/2)

515

if total_bytes_used * 2 >= self._block._content_length:

516

return

517

# Can we just strip off the trailing bytes? If we are going to be

518

# transmitting more than 50% of the front of the content, go ahead

519

if total_bytes_used * 2 > last_byte_used:

520

self._trim_block(last_byte_used)

521

return

522

523

# We are using a small amount of the data, and it isn't just packed

524

# nicely at the front, so rebuild the content.

525

# Note: This would be *nicer* as a strip-data-from-group, rather than

526

# building it up again from scratch

527

# It might be reasonable to consider the fulltext sizes for

528

# different bits when deciding this, too. As you may have a small

529

# fulltext, and a trivial delta, and you are just trading around

530

# for another fulltext. If we do a simple 'prune' you may end up

531

# expanding many deltas into fulltexts, as well.

532

# If we build a cheap enough 'strip', then we could try a strip,

533

# if that expands the content, we then rebuild.

534

self._rebuild_block()

535

536

def _wire_bytes(self):

537

"""Return a byte stream suitable for transmitting over the wire."""

538

self._check_rebuild_block()

539

# The outer block starts with:

540

# 'groupcompress-block\n'

541

# <length of compressed key info>\n

542

# <length of uncompressed info>\n

543

# <length of gc block>\n

544

# <header bytes>

545

# <gc-block>

546

lines = ['groupcompress-block\n']

547

# The minimal info we need is the key, the start offset, and the

548

# parents. The length and type are encoded in the record itself.

549

# However, passing in the other bits makes it easier. The list of

550

# keys, and the start offset, the length

551

# 1 line key

552

# 1 line with parents, '' for ()

553

# 1 line for start offset

554

# 1 line for end byte

555

header_lines = []

556

for factory in self._factories:

557

key_bytes = '\x00'.join(factory.key)

558

parents = factory.parents

559

if parents is None:

560

parent_bytes = 'None:'

561

else:

562

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

563

record_header = '%s\n%s\n%d\n%d\n' % (

564

key_bytes, parent_bytes, factory._start, factory._end)

565

header_lines.append(record_header)

566

# TODO: Can we break the refcycle at this point and set

567

# factory._manager = None?

568

header_bytes = ''.join(header_lines)

569

del header_lines

570

header_bytes_len = len(header_bytes)

571

z_header_bytes = zlib.compress(header_bytes)

572

del header_bytes

573

z_header_bytes_len = len(z_header_bytes)

574

block_bytes = self._block.to_bytes()

575

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

576

len(block_bytes)))

577

lines.append(z_header_bytes)

578

lines.append(block_bytes)

579

del z_header_bytes, block_bytes

580

return ''.join(lines)

581

582

@classmethod

583

def from_bytes(cls, bytes):

584

# TODO: This does extra string copying, probably better to do it a

585

# different way

586

(storage_kind, z_header_len, header_len,

587

block_len, rest) = bytes.split('\n', 4)

588

del bytes

589

if storage_kind != 'groupcompress-block':

590

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

591

z_header_len = int(z_header_len)

592

if len(rest) < z_header_len:

593

raise ValueError('Compressed header len shorter than all bytes')

594

z_header = rest[:z_header_len]

595

header_len = int(header_len)

596

header = zlib.decompress(z_header)

597

if len(header) != header_len:

598

raise ValueError('invalid length for decompressed bytes')

599

del z_header

600

block_len = int(block_len)

601

if len(rest) != z_header_len + block_len:

602

raise ValueError('Invalid length for block')

603

block_bytes = rest[z_header_len:]

604

del rest

605

# So now we have a valid GCB, we just need to parse the factories that

606

# were sent to us

607

header_lines = header.split('\n')

608

del header

609

last = header_lines.pop()

610

if last != '':

611

raise ValueError('header lines did not end with a trailing'

612

' newline')

613

if len(header_lines) % 4 != 0:

614

raise ValueError('The header was not an even multiple of 4 lines')

615

block = GroupCompressBlock.from_bytes(block_bytes)

616

del block_bytes

617

result = cls(block)

618

for start in xrange(0, len(header_lines), 4):

619

# intern()?

620

key = tuple(header_lines[start].split('\x00'))

621

parents_line = header_lines[start+1]

622

if parents_line == 'None:':

623

parents = None

624

else:

625

parents = tuple([tuple(segment.split('\x00'))

626

for segment in parents_line.split('\t')

627

if segment])

628

start_offset = int(header_lines[start+2])

629

end_offset = int(header_lines[start+3])

630

result.add_factory(key, parents, start_offset, end_offset)

631

return result

632

633

634

def network_block_to_records(storage_kind, bytes, line_end):

635

if storage_kind != 'groupcompress-block':

636

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

637

manager = _LazyGroupContentManager.from_bytes(bytes)

638

return manager.get_record_stream()

639

640

641

class _CommonGroupCompressor(object):

642

643

def __init__(self):

644

"""Create a GroupCompressor."""

645

self.chunks = []

646

self._last = None

647

self.endpoint = 0

648

self.input_bytes = 0

649

self.labels_deltas = {}

650

self._delta_index = None # Set by the children

651

self._block = GroupCompressBlock()

652

653

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

654

"""Compress lines with label key.

655

656

:param key: A key tuple. It is stored in the output

657

for identification of the text during decompression. If the last

658

element is 'None' it is replaced with the sha1 of the text -

659

e.g. sha1:xxxxxxx.

660

:param bytes: The bytes to be compressed

661

:param expected_sha: If non-None, the sha the lines are believed to

662

have. During compression the sha is calculated; a mismatch will

663

cause an error.

664

:param nostore_sha: If the computed sha1 sum matches, we will raise

665

ExistingContent rather than adding the text.

666

:param soft: Do a 'soft' compression. This means that we require larger

667

ranges to match to be considered for a copy command.

668

669

:return: The sha1 of lines, the start and end offsets in the delta, and

670

the type ('fulltext' or 'delta').

671

672

:seealso VersionedFiles.add_lines:

673

"""

674

if not bytes: # empty, like a dir entry, etc

675

if nostore_sha == _null_sha1:

676

raise errors.ExistingContent()

677

return _null_sha1, 0, 0, 'fulltext'

678

# we assume someone knew what they were doing when they passed it in

679

if expected_sha is not None:

680

sha1 = expected_sha

681

else:

682

sha1 = osutils.sha_string(bytes)

683

if nostore_sha is not None:

684

if sha1 == nostore_sha:

685

raise errors.ExistingContent()

686

if key[-1] is None:

687

key = key[:-1] + ('sha1:' + sha1,)

688

689

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

690

return sha1, start, end, type

691

692

def _compress(self, key, bytes, max_delta_size, soft=False):

693

"""Compress lines with label key.

694

695

:param key: A key tuple. It is stored in the output for identification

696

of the text during decompression.

697

698

:param bytes: The bytes to be compressed

699

700

:param max_delta_size: The size above which we issue a fulltext instead

701

of a delta.

702

703

:param soft: Do a 'soft' compression. This means that we require larger

704

ranges to match to be considered for a copy command.

705

706

:return: The sha1 of lines, the start and end offsets in the delta, and

707

the type ('fulltext' or 'delta').

708

"""

709

raise NotImplementedError(self._compress)

710

711

def extract(self, key):

712

"""Extract a key previously added to the compressor.

713

714

:param key: The key to extract.

715

:return: An iterable over bytes and the sha1.

716

"""

717

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

718

delta_chunks = self.chunks[start_chunk:end_chunk]

719

stored_bytes = ''.join(delta_chunks)

720

if stored_bytes[0] == 'f':

721

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

722

data_len = fulltext_len + 1 + offset

723

if data_len != len(stored_bytes):

724

raise ValueError('Index claimed fulltext len, but stored bytes'

725

' claim %s != %s'

726

% (len(stored_bytes), data_len))

727

bytes = stored_bytes[offset + 1:]

728

else:

729

# XXX: This is inefficient at best

730

source = ''.join(self.chunks[:start_chunk])

731

if stored_bytes[0] != 'd':

732

raise ValueError('Unknown content kind, bytes claim %s'

733

% (stored_bytes[0],))

734

delta_len, offset = decode_base128_int(stored_bytes[1:10])

735

data_len = delta_len + 1 + offset

736

if data_len != len(stored_bytes):

737

raise ValueError('Index claimed delta len, but stored bytes'

738

' claim %s != %s'

739

% (len(stored_bytes), data_len))

740

bytes = apply_delta(source, stored_bytes[offset + 1:])

741

bytes_sha1 = osutils.sha_string(bytes)

742

return bytes, bytes_sha1

743

744

def flush(self):

745

"""Finish this group, creating a formatted stream.

746

747

After calling this, the compressor should no longer be used

748

"""

749

content = ''.join(self.chunks)

750

self.chunks = None

751

self._delta_index = None

752

self._block.set_content(content)

753

return self._block

754

755

def pop_last(self):

756

"""Call this if you want to 'revoke' the last compression.

757

758

After this, the data structures will be rolled back, but you cannot do

759

more compression.

760

"""

761

self._delta_index = None

762

del self.chunks[self._last[0]:]

763

self.endpoint = self._last[1]

764

self._last = None

765

766

def ratio(self):

767

"""Return the overall compression ratio."""

768

return float(self.input_bytes) / float(self.endpoint)

769

770

771

class PythonGroupCompressor(_CommonGroupCompressor):

772

773

def __init__(self):

774

"""Create a GroupCompressor.

775

776

Used only if the pyrex version is not available.

777

"""

778

super(PythonGroupCompressor, self).__init__()

779

self._delta_index = LinesDeltaIndex([])

780

# The actual content is managed by LinesDeltaIndex

781

self.chunks = self._delta_index.lines

782

783

def _compress(self, key, bytes, max_delta_size, soft=False):

784

"""see _CommonGroupCompressor._compress"""

785

input_len = len(bytes)

786

new_lines = osutils.split_lines(bytes)

787

out_lines, index_lines = self._delta_index.make_delta(

788

new_lines, bytes_length=input_len, soft=soft)

789

delta_length = sum(map(len, out_lines))

790

if delta_length > max_delta_size:

791

# The delta is longer than the fulltext, insert a fulltext

792

type = 'fulltext'

793

out_lines = ['f', encode_base128_int(input_len)]

794

out_lines.extend(new_lines)

795

index_lines = [False, False]

796

index_lines.extend([True] * len(new_lines))

797

else:

798

# this is a worthy delta, output it

799

type = 'delta'

800

out_lines[0] = 'd'

801

# Update the delta_length to include those two encoded integers

802

out_lines[1] = encode_base128_int(delta_length)

803

# Before insertion

804

start = self.endpoint

805

chunk_start = len(self.chunks)

806

self._last = (chunk_start, self.endpoint)

807

self._delta_index.extend_lines(out_lines, index_lines)

808

self.endpoint = self._delta_index.endpoint

809

self.input_bytes += input_len

810

chunk_end = len(self.chunks)

811

self.labels_deltas[key] = (start, chunk_start,

812

self.endpoint, chunk_end)

813

return start, self.endpoint, type

814

815

816

class PyrexGroupCompressor(_CommonGroupCompressor):

817

"""Produce a serialised group of compressed texts.

818

819

It contains code very similar to SequenceMatcher because of having a similar

820

task. However some key differences apply:

821

- there is no junk, we want a minimal edit not a human readable diff.

822

- we don't filter very common lines (because we don't know where a good

823

range will start, and after the first text we want to be emitting minmal

824

edits only.

825

- we chain the left side, not the right side

826

- we incrementally update the adjacency matrix as new lines are provided.

827

- we look for matches in all of the left side, so the routine which does

828

the analagous task of find_longest_match does not need to filter on the

829

left side.

830

"""

831

832

def __init__(self):

833

super(PyrexGroupCompressor, self).__init__()

834

self._delta_index = DeltaIndex()

835

836

def _compress(self, key, bytes, max_delta_size, soft=False):

837

"""see _CommonGroupCompressor._compress"""

838

input_len = len(bytes)

839

# By having action/label/sha1/len, we can parse the group if the index

840

# was ever destroyed, we have the key in 'label', we know the final

841

# bytes are valid from sha1, and we know where to find the end of this

842

# record because of 'len'. (the delta record itself will store the

843

# total length for the expanded record)

844

# 'len: %d\n' costs approximately 1% increase in total data

845

# Having the labels at all costs us 9-10% increase, 38% increase for

846

# inventory pages, and 5.8% increase for text pages

847

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

848

if self._delta_index._source_offset != self.endpoint:

849

raise AssertionError('_source_offset != endpoint'

850

' somehow the DeltaIndex got out of sync with'

851

' the output lines')

852

delta = self._delta_index.make_delta(bytes, max_delta_size)

853

if (delta is None):

854

type = 'fulltext'

855

enc_length = encode_base128_int(len(bytes))

856

len_mini_header = 1 + len(enc_length)

857

self._delta_index.add_source(bytes, len_mini_header)

858

new_chunks = ['f', enc_length, bytes]

859

else:

860

type = 'delta'

861

enc_length = encode_base128_int(len(delta))

862

len_mini_header = 1 + len(enc_length)

863

new_chunks = ['d', enc_length, delta]

864

self._delta_index.add_delta_source(delta, len_mini_header)

865

# Before insertion

866

start = self.endpoint

867

chunk_start = len(self.chunks)

868

# Now output these bytes

869

self._output_chunks(new_chunks)

870

self.input_bytes += input_len

871

chunk_end = len(self.chunks)

872

self.labels_deltas[key] = (start, chunk_start,

873

self.endpoint, chunk_end)

874

if not self._delta_index._source_offset == self.endpoint:

875

raise AssertionError('the delta index is out of sync'

876

'with the output lines %s != %s'

877

% (self._delta_index._source_offset, self.endpoint))

878

return start, self.endpoint, type

879

880

def _output_chunks(self, new_chunks):

881

"""Output some chunks.

882

883

:param new_chunks: The chunks to output.

884

"""

885

self._last = (len(self.chunks), self.endpoint)

886

endpoint = self.endpoint

887

self.chunks.extend(new_chunks)

888

endpoint += sum(map(len, new_chunks))

889

self.endpoint = endpoint

890

891

892

def make_pack_factory(graph, delta, keylength):

893

"""Create a factory for creating a pack based groupcompress.

894

895

This is only functional enough to run interface tests, it doesn't try to

896

provide a full pack environment.

897

898

:param graph: Store a graph.

899

:param delta: Delta compress contents.

900

:param keylength: How long should keys be.

901

"""

902

def factory(transport):

903

parents = graph

904

ref_length = 0

905

if graph:

906

ref_length = 1

907

graph_index = BTreeBuilder(reference_lists=ref_length,

908

key_elements=keylength)

909

stream = transport.open_write_stream('newpack')

910

writer = pack.ContainerWriter(stream.write)

911

writer.begin()

912

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

913

add_callback=graph_index.add_nodes)

914

access = knit._DirectPackAccess({})

915

access.set_writer(writer, graph_index, (transport, 'newpack'))

916

result = GroupCompressVersionedFiles(index, access, delta)

917

result.stream = stream

918

result.writer = writer

919

return result

920

return factory

921

922

923

def cleanup_pack_group(versioned_files):

924

versioned_files.writer.end()

925

versioned_files.stream.close()

926

927

928

class GroupCompressVersionedFiles(VersionedFiles):

929

"""A group-compress based VersionedFiles implementation."""

930

931

def __init__(self, index, access, delta=True):

932

"""Create a GroupCompressVersionedFiles object.

933

934

:param index: The index object storing access and graph data.

935

:param access: The access object storing raw data.

936

:param delta: Whether to delta compress or just entropy compress.

937

"""

938

self._index = index

939

self._access = access

940

self._delta = delta

941

self._unadded_refs = {}

942

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

943

self._fallback_vfs = []

944

945

def add_lines(self, key, parents, lines, parent_texts=None,

946

left_matching_blocks=None, nostore_sha=None, random_id=False,

947

check_content=True):

948

"""Add a text to the store.

949

950

:param key: The key tuple of the text to add.

951

:param parents: The parents key tuples of the text to add.

952

:param lines: A list of lines. Each line must be a bytestring. And all

953

of them except the last must be terminated with \n and contain no

954

other \n's. The last line may either contain no \n's or a single

955

terminating \n. If the lines list does meet this constraint the add

956

routine may error or may succeed - but you will be unable to read

957

the data back accurately. (Checking the lines have been split

958

correctly is expensive and extremely unlikely to catch bugs so it

959

is not done at runtime unless check_content is True.)

960

:param parent_texts: An optional dictionary containing the opaque

961

representations of some or all of the parents of version_id to

962

allow delta optimisations. VERY IMPORTANT: the texts must be those

963

returned by add_lines or data corruption can be caused.

964

:param left_matching_blocks: a hint about which areas are common

965

between the text and its left-hand-parent. The format is

966

the SequenceMatcher.get_matching_blocks format.

967

:param nostore_sha: Raise ExistingContent and do not add the lines to

968

the versioned file if the digest of the lines matches this.

969

:param random_id: If True a random id has been selected rather than

970

an id determined by some deterministic process such as a converter

971

from a foreign VCS. When True the backend may choose not to check

972

for uniqueness of the resulting key within the versioned file, so

973

this should only be done when the result is expected to be unique

974

anyway.

975

:param check_content: If True, the lines supplied are verified to be

976

bytestrings that are correctly formed lines.

977

:return: The text sha1, the number of bytes in the text, and an opaque

978

representation of the inserted version which can be provided

979

back to future add_lines calls in the parent_texts dictionary.

980

"""

981

self._index._check_write_ok()

982

self._check_add(key, lines, random_id, check_content)

983

if parents is None:

984

# The caller might pass None if there is no graph data, but kndx

985

# indexes can't directly store that, so we give them

986

# an empty tuple instead.

987

parents = ()

988

# double handling for now. Make it work until then.

989

length = sum(map(len, lines))

990

record = ChunkedContentFactory(key, parents, None, lines)

991

sha1 = list(self._insert_record_stream([record], random_id=random_id,

992

nostore_sha=nostore_sha))[0]

993

return sha1, length, None

994

995

def add_fallback_versioned_files(self, a_versioned_files):

996

"""Add a source of texts for texts not present in this knit.

997

998

:param a_versioned_files: A VersionedFiles object.

999

"""

1000

self._fallback_vfs.append(a_versioned_files)

1001

1002

def annotate(self, key):

1003

"""See VersionedFiles.annotate."""

1004

graph = Graph(self)

1005

parent_map = self.get_parent_map([key])

1006

if not parent_map:

1007

raise errors.RevisionNotPresent(key, self)

1008

if parent_map[key] is not None:

1009

search = graph._make_breadth_first_searcher([key])

1010

keys = set()

1011

while True:

1012

try:

1013

present, ghosts = search.next_with_ghosts()

1014

except StopIteration:

1015

break

1016

keys.update(present)

1017

parent_map = self.get_parent_map(keys)

1018

else:

1019

keys = [key]

1020

parent_map = {key:()}

1021

# So we used Graph(self) to load the parent_map, but now that we have

1022

# it, we can just query the parent map directly, so create a new Graph

1023

# object

1024

graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))

1025

head_cache = _mod_graph.FrozenHeadsCache(graph)

1026

parent_cache = {}

1027

reannotate = annotate.reannotate

1028

for record in self.get_record_stream(keys, 'topological', True):

1029

key = record.key

1030

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1031

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

1032

parent_cache[key] = list(

1033

reannotate(parent_lines, lines, key, None, head_cache))

1034

return parent_cache[key]

1035

1036

def check(self, progress_bar=None):

1037

"""See VersionedFiles.check()."""

1038

keys = self.keys()

1039

for record in self.get_record_stream(keys, 'unordered', True):

1040

record.get_bytes_as('fulltext')

1041

1042

def _check_add(self, key, lines, random_id, check_content):

1043

"""check that version_id and lines are safe to add."""

1044

version_id = key[-1]

1045

if version_id is not None:

1046

if osutils.contains_whitespace(version_id):

1047

raise errors.InvalidRevisionId(version_id, self)

1048

self.check_not_reserved_id(version_id)

1049

# TODO: If random_id==False and the key is already present, we should

1050

# probably check that the existing content is identical to what is

1051

# being inserted, and otherwise raise an exception. This would make

1052

# the bundle code simpler.

1053

if check_content:

1054

self._check_lines_not_unicode(lines)

1055

self._check_lines_are_lines(lines)

1056

1057

def get_parent_map(self, keys):

1058

"""Get a map of the graph parents of keys.

1059

1060

:param keys: The keys to look up parents for.

1061

:return: A mapping from keys to parents. Absent keys are absent from

1062

the mapping.

1063

"""

1064

return self._get_parent_map_with_sources(keys)[0]

1065

1066

def _get_parent_map_with_sources(self, keys):

1067

"""Get a map of the parents of keys.

1068

1069

:param keys: The keys to look up parents for.

1070

:return: A tuple. The first element is a mapping from keys to parents.

1071

Absent keys are absent from the mapping. The second element is a

1072

list with the locations each key was found in. The first element

1073

is the in-this-knit parents, the second the first fallback source,

1074

and so on.

1075

"""

1076

result = {}

1077

sources = [self._index] + self._fallback_vfs

1078

source_results = []

1079

missing = set(keys)

1080

for source in sources:

1081

if not missing:

1082

break

1083

new_result = source.get_parent_map(missing)

1084

source_results.append(new_result)

1085

result.update(new_result)

1086

missing.difference_update(set(new_result))

1087

return result, source_results

1088

1089

def _get_block(self, index_memo):

1090

read_memo = index_memo[0:3]

1091

# get the group:

1092

try:

1093

block = self._group_cache[read_memo]

1094

except KeyError:

1095

# read the group

1096

zdata = self._access.get_raw_records([read_memo]).next()

1097

# decompress - whole thing - this is not a bug, as it

1098

# permits caching. We might want to store the partially

1099

# decompresed group and decompress object, so that recent

1100

# texts are not penalised by big groups.

1101

block = GroupCompressBlock.from_bytes(zdata)

1102

self._group_cache[read_memo] = block

1103

# cheapo debugging:

1104

# print len(zdata), len(plain)

1105

# parse - requires split_lines, better to have byte offsets

1106

# here (but not by much - we only split the region for the

1107

# recipe, and we often want to end up with lines anyway.

1108

return block

1109

1110

def get_missing_compression_parent_keys(self):

1111

"""Return the keys of missing compression parents.

1112

1113

Missing compression parents occur when a record stream was missing

1114

basis texts, or a index was scanned that had missing basis texts.

1115

"""

1116

# GroupCompress cannot currently reference texts that are not in the

1117

# group, so this is valid for now

1118

return frozenset()

1119

1120

def get_record_stream(self, keys, ordering, include_delta_closure):

1121

"""Get a stream of records for keys.

1122

1123

:param keys: The keys to include.

1124

:param ordering: Either 'unordered' or 'topological'. A topologically

1125

sorted stream has compression parents strictly before their

1126

children.

1127

:param include_delta_closure: If True then the closure across any

1128

compression parents will be included (in the opaque data).

1129

:return: An iterator of ContentFactory objects, each of which is only

1130

valid until the iterator is advanced.

1131

"""

1132

# keys might be a generator

1133

orig_keys = list(keys)

1134

keys = set(keys)

1135

if not keys:

1136

return

1137

if (not self._index.has_graph

1138

and ordering in ('topological', 'groupcompress')):

1139

# Cannot topological order when no graph has been stored.

1140

# but we allow 'as-requested' or 'unordered'

1141

ordering = 'unordered'

1142

1143

remaining_keys = keys

1144

while True:

1145

try:

1146

keys = set(remaining_keys)

1147

for content_factory in self._get_remaining_record_stream(keys,

1148

orig_keys, ordering, include_delta_closure):

1149

remaining_keys.discard(content_factory.key)

1150

yield content_factory

1151

return

1152

except errors.RetryWithNewPacks, e:

1153

self._access.reload_or_raise(e)

1154

1155

def _find_from_fallback(self, missing):

1156

"""Find whatever keys you can from the fallbacks.

1157

1158

:param missing: A set of missing keys. This set will be mutated as keys

1159

are found from a fallback_vfs

1160

:return: (parent_map, key_to_source_map, source_results)

1161

parent_map the overall key => parent_keys

1162

key_to_source_map a dict from {key: source}

1163

source_results a list of (source: keys)

1164

"""

1165

parent_map = {}

1166

key_to_source_map = {}

1167

source_results = []

1168

for source in self._fallback_vfs:

1169

if not missing:

1170

break

1171

source_parents = source.get_parent_map(missing)

1172

parent_map.update(source_parents)

1173

source_parents = list(source_parents)

1174

source_results.append((source, source_parents))

1175

key_to_source_map.update((key, source) for key in source_parents)

1176

missing.difference_update(source_parents)

1177

return parent_map, key_to_source_map, source_results

1178

1179

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1180

"""Get the (source, [keys]) list.

1181

1182

The returned objects should be in the order defined by 'ordering',

1183

which can weave between different sources.

1184

:param ordering: Must be one of 'topological' or 'groupcompress'

1185

:return: List of [(source, [keys])] tuples, such that all keys are in

1186

the defined order, regardless of source.

1187

"""

1188

if ordering == 'topological':

1189

present_keys = topo_sort(parent_map)

1190

else:

1191

# ordering == 'groupcompress'

1192

# XXX: This only optimizes for the target ordering. We may need

1193

# to balance that with the time it takes to extract

1194

# ordering, by somehow grouping based on

1195

# locations[key][0:3]

1196

present_keys = sort_gc_optimal(parent_map)

1197

# Now group by source:

1198

source_keys = []

1199

current_source = None

1200

for key in present_keys:

1201

source = key_to_source_map.get(key, self)

1202

if source is not current_source:

1203

source_keys.append((source, []))

1204

current_source = source

1205

source_keys[-1][1].append(key)

1206

return source_keys

1207

1208

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1209

key_to_source_map):

1210

source_keys = []

1211

current_source = None

1212

for key in orig_keys:

1213

if key in locations or key in unadded_keys:

1214

source = self

1215

elif key in key_to_source_map:

1216

source = key_to_source_map[key]

1217

else: # absent

1218

continue

1219

if source is not current_source:

1220

source_keys.append((source, []))

1221

current_source = source

1222

source_keys[-1][1].append(key)

1223

return source_keys

1224

1225

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1226

source_result):

1227

def get_group(key):

1228

# This is the group the bytes are stored in, followed by the

1229

# location in the group

1230

return locations[key][0]

1231

present_keys = sorted(locations.iterkeys(), key=get_group)

1232

# We don't have an ordering for keys in the in-memory object, but

1233

# lets process the in-memory ones first.

1234

present_keys = list(unadded_keys) + present_keys

1235

# Now grab all of the ones from other sources

1236

source_keys = [(self, present_keys)]

1237

source_keys.extend(source_result)

1238

return source_keys

1239

1240

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1241

include_delta_closure):

1242

"""Get a stream of records for keys.

1243

1244

:param keys: The keys to include.

1245

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1246

'as-requested'

1247

:param include_delta_closure: If True then the closure across any

1248

compression parents will be included (in the opaque data).

1249

:return: An iterator of ContentFactory objects, each of which is only

1250

valid until the iterator is advanced.

1251

"""

1252

# Cheap: iterate

1253

locations = self._index.get_build_details(keys)

1254

unadded_keys = set(self._unadded_refs).intersection(keys)

1255

missing = keys.difference(locations)

1256

missing.difference_update(unadded_keys)

1257

(fallback_parent_map, key_to_source_map,

1258

source_result) = self._find_from_fallback(missing)

1259

if ordering in ('topological', 'groupcompress'):

1260

# would be better to not globally sort initially but instead

1261

# start with one key, recurse to its oldest parent, then grab

1262

# everything in the same group, etc.

1263

parent_map = dict((key, details[2]) for key, details in

1264

locations.iteritems())

1265

for key in unadded_keys:

1266

parent_map[key] = self._unadded_refs[key]

1267

parent_map.update(fallback_parent_map)

1268

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1269

key_to_source_map)

1270

elif ordering == 'as-requested':

1271

source_keys = self._get_as_requested_source_keys(orig_keys,

1272

locations, unadded_keys, key_to_source_map)

1273

else:

1274

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1275

# Otherwise we thrash the _group_cache and destroy performance

1276

source_keys = self._get_io_ordered_source_keys(locations,

1277

unadded_keys, source_result)

1278

for key in missing:

1279

yield AbsentContentFactory(key)

1280

manager = None

1281

last_read_memo = None

1282

# TODO: This works fairly well at batching up existing groups into a

1283

# streamable format, and possibly allowing for taking one big

1284

# group and splitting it when it isn't fully utilized.

1285

# However, it doesn't allow us to find under-utilized groups and

1286

# combine them into a bigger group on the fly.

1287

# (Consider the issue with how chk_map inserts texts

1288

# one-at-a-time.) This could be done at insert_record_stream()

1289

# time, but it probably would decrease the number of

1290

# bytes-on-the-wire for fetch.

1291

for source, keys in source_keys:

1292

if source is self:

1293

for key in keys:

1294

if key in self._unadded_refs:

1295

if manager is not None:

1296

for factory in manager.get_record_stream():

1297

yield factory

1298

last_read_memo = manager = None

1299

bytes, sha1 = self._compressor.extract(key)

1300

parents = self._unadded_refs[key]

1301

yield FulltextContentFactory(key, parents, sha1, bytes)

1302

else:

1303

index_memo, _, parents, (method, _) = locations[key]

1304

read_memo = index_memo[0:3]

1305

if last_read_memo != read_memo:

1306

# We are starting a new block. If we have a

1307

# manager, we have found everything that fits for

1308

# now, so yield records

1309

if manager is not None:

1310

for factory in manager.get_record_stream():

1311

yield factory

1312

# Now start a new manager

1313

block = self._get_block(index_memo)

1314

manager = _LazyGroupContentManager(block)

1315

last_read_memo = read_memo

1316

start, end = index_memo[3:5]

1317

manager.add_factory(key, parents, start, end)

1318

else:

1319

if manager is not None:

1320

for factory in manager.get_record_stream():

1321

yield factory

1322

last_read_memo = manager = None

1323

for record in source.get_record_stream(keys, ordering,

1324

include_delta_closure):

1325

yield record

1326

if manager is not None:

1327

for factory in manager.get_record_stream():

1328

yield factory

1329

1330

def get_sha1s(self, keys):

1331

"""See VersionedFiles.get_sha1s()."""

1332

result = {}

1333

for record in self.get_record_stream(keys, 'unordered', True):

1334

if record.sha1 != None:

1335

result[record.key] = record.sha1

1336

else:

1337

if record.storage_kind != 'absent':

1338

result[record.key] = osutils.sha_string(

1339

record.get_bytes_as('fulltext'))

1340

return result

1341

1342

def insert_record_stream(self, stream):

1343

"""Insert a record stream into this container.

1344

1345

:param stream: A stream of records to insert.

1346

:return: None

1347

:seealso VersionedFiles.get_record_stream:

1348

"""

1349

# XXX: Setting random_id=True makes

1350

# test_insert_record_stream_existing_keys fail for groupcompress and

1351

# groupcompress-nograph, this needs to be revisited while addressing

1352

# 'bzr branch' performance issues.

1353

for _ in self._insert_record_stream(stream, random_id=False):

1354

pass

1355

1356

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1357

reuse_blocks=True):

1358

"""Internal core to insert a record stream into this container.

1359

1360

This helper function has a different interface than insert_record_stream

1361

to allow add_lines to be minimal, but still return the needed data.

1362

1363

:param stream: A stream of records to insert.

1364

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1365

raise ExistingContent, rather than committing the new text.

1366

:param reuse_blocks: If the source is streaming from

1367

groupcompress-blocks, just insert the blocks as-is, rather than

1368

expanding the texts and inserting again.

1369

:return: An iterator over the sha1 of the inserted records.

1370

:seealso insert_record_stream:

1371

:seealso add_lines:

1372

"""

1373

adapters = {}

1374

def get_adapter(adapter_key):

1375

try:

1376

return adapters[adapter_key]

1377

except KeyError:

1378

adapter_factory = adapter_registry.get(adapter_key)

1379

adapter = adapter_factory(self)

1380

adapters[adapter_key] = adapter

1381

return adapter

1382

# This will go up to fulltexts for gc to gc fetching, which isn't

1383

# ideal.

1384

self._compressor = GroupCompressor()

1385

self._unadded_refs = {}

1386

keys_to_add = []

1387

def flush():

1388

bytes = self._compressor.flush().to_bytes()

1389

index, start, length = self._access.add_raw_records(

1390

[(None, len(bytes))], bytes)[0]

1391

nodes = []

1392

for key, reads, refs in keys_to_add:

1393

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1394

self._index.add_records(nodes, random_id=random_id)

1395

self._unadded_refs = {}

1396

del keys_to_add[:]

1397

self._compressor = GroupCompressor()

1398

1399

last_prefix = None

1400

max_fulltext_len = 0

1401

max_fulltext_prefix = None

1402

insert_manager = None

1403

block_start = None

1404

block_length = None

1405

# XXX: TODO: remove this, it is just for safety checking for now

1406

inserted_keys = set()

1407

for record in stream:

1408

# Raise an error when a record is missing.

1409

if record.storage_kind == 'absent':

1410

raise errors.RevisionNotPresent(record.key, self)

1411

if random_id:

1412

if record.key in inserted_keys:

1413

trace.note('Insert claimed random_id=True,'

1414

' but then inserted %r two times', record.key)

1415

continue

1416

inserted_keys.add(record.key)

1417

if reuse_blocks:

1418

# If the reuse_blocks flag is set, check to see if we can just

1419

# copy a groupcompress block as-is.

1420

if record.storage_kind == 'groupcompress-block':

1421

# Insert the raw block into the target repo

1422

insert_manager = record._manager

1423

insert_manager._check_rebuild_block()

1424

bytes = record._manager._block.to_bytes()

1425

_, start, length = self._access.add_raw_records(

1426

[(None, len(bytes))], bytes)[0]

1427

del bytes

1428

block_start = start

1429

block_length = length

1430

if record.storage_kind in ('groupcompress-block',

1431

'groupcompress-block-ref'):

1432

if insert_manager is None:

1433

raise AssertionError('No insert_manager set')

1434

value = "%d %d %d %d" % (block_start, block_length,

1435

record._start, record._end)

1436

nodes = [(record.key, value, (record.parents,))]

1437

# TODO: Consider buffering up many nodes to be added, not

1438

# sure how much overhead this has, but we're seeing

1439

# ~23s / 120s in add_records calls

1440

self._index.add_records(nodes, random_id=random_id)

1441

continue

1442

try:

1443

bytes = record.get_bytes_as('fulltext')

1444

except errors.UnavailableRepresentation:

1445

adapter_key = record.storage_kind, 'fulltext'

1446

adapter = get_adapter(adapter_key)

1447

bytes = adapter.get_bytes(record)

1448

if len(record.key) > 1:

1449

prefix = record.key[0]

1450

soft = (prefix == last_prefix)

1451

else:

1452

prefix = None

1453

soft = False

1454

if max_fulltext_len < len(bytes):

1455

max_fulltext_len = len(bytes)

1456

max_fulltext_prefix = prefix

1457

(found_sha1, start_point, end_point,

1458

type) = self._compressor.compress(record.key,

1459

bytes, record.sha1, soft=soft,

1460

nostore_sha=nostore_sha)

1461

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1462

# Check if we want to continue to include that text

1463

if (prefix == max_fulltext_prefix

1464

and end_point < 2 * max_fulltext_len):

1465

# As long as we are on the same file_id, we will fill at least

1466

# 2 * max_fulltext_len

1467

start_new_block = False

1468

elif end_point > 4*1024*1024:

1469

start_new_block = True

1470

elif (prefix is not None and prefix != last_prefix

1471

and end_point > 2*1024*1024):

1472

start_new_block = True

1473

else:

1474

start_new_block = False

1475

last_prefix = prefix

1476

if start_new_block:

1477

self._compressor.pop_last()

1478

flush()

1479

max_fulltext_len = len(bytes)

1480

(found_sha1, start_point, end_point,

1481

type) = self._compressor.compress(record.key, bytes,

1482

record.sha1)

1483

if record.key[-1] is None:

1484

key = record.key[:-1] + ('sha1:' + found_sha1,)

1485

else:

1486

key = record.key

1487

self._unadded_refs[key] = record.parents

1488

yield found_sha1

1489

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1490

(record.parents,)))

1491

if len(keys_to_add):

1492

flush()

1493

self._compressor = None

1494

1495

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1496

"""Iterate over the lines in the versioned files from keys.

1497

1498

This may return lines from other keys. Each item the returned

1499

iterator yields is a tuple of a line and a text version that that line

1500

is present in (not introduced in).

1501

1502

Ordering of results is in whatever order is most suitable for the

1503

underlying storage format.

1504

1505

If a progress bar is supplied, it may be used to indicate progress.

1506

The caller is responsible for cleaning up progress bars (because this

1507

is an iterator).

1508

1509

NOTES:

1510

* Lines are normalised by the underlying store: they will all have \n

1511

terminators.

1512

* Lines are returned in arbitrary order.

1513

1514

:return: An iterator over (line, key).

1515

"""

1516

if pb is None:

1517

pb = progress.DummyProgress()

1518

keys = set(keys)

1519

total = len(keys)

1520

# we don't care about inclusions, the caller cares.

1521

# but we need to setup a list of records to visit.

1522

# we need key, position, length

1523

for key_idx, record in enumerate(self.get_record_stream(keys,

1524

'unordered', True)):

1525

# XXX: todo - optimise to use less than full texts.

1526

key = record.key

1527

pb.update('Walking content', key_idx, total)

1528

if record.storage_kind == 'absent':

1529

raise errors.RevisionNotPresent(key, self)

1530

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1531

for line in lines:

1532

yield line, key

1533

pb.update('Walking content', total, total)

1534

1535

def keys(self):

1536

"""See VersionedFiles.keys."""

1537

if 'evil' in debug.debug_flags:

1538

trace.mutter_callsite(2, "keys scales with size of history")

1539

sources = [self._index] + self._fallback_vfs

1540

result = set()

1541

for source in sources:

1542

result.update(source.keys())

1543

return result

1544

1545

1546

class _GCGraphIndex(object):

1547

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1548

1549

def __init__(self, graph_index, is_locked, parents=True,

1550

add_callback=None, track_external_parent_refs=False):

1551

"""Construct a _GCGraphIndex on a graph_index.

1552

1553

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1554

:param is_locked: A callback, returns True if the index is locked and

1555

thus usable.

1556

:param parents: If True, record knits parents, if not do not record

1557

parents.

1558

:param add_callback: If not None, allow additions to the index and call

1559

this callback with a list of added GraphIndex nodes:

1560

[(node, value, node_refs), ...]

1561

:param track_external_parent_refs: As keys are added, keep track of the

1562

keys they reference, so that we can query get_missing_parents(),

1563

etc.

1564

"""

1565

self._add_callback = add_callback

1566

self._graph_index = graph_index

1567

self._parents = parents

1568

self.has_graph = parents

1569

self._is_locked = is_locked

1570

if track_external_parent_refs:

1571

self._key_dependencies = knit._KeyRefs()

1572

else:

1573

self._key_dependencies = None

1574

1575

def add_records(self, records, random_id=False):

1576

"""Add multiple records to the index.

1577

1578

This function does not insert data into the Immutable GraphIndex

1579

backing the KnitGraphIndex, instead it prepares data for insertion by

1580

the caller and checks that it is safe to insert then calls

1581

self._add_callback with the prepared GraphIndex nodes.

1582

1583

:param records: a list of tuples:

1584

(key, options, access_memo, parents).

1585

:param random_id: If True the ids being added were randomly generated

1586

and no check for existence will be performed.

1587

"""

1588

if not self._add_callback:

1589

raise errors.ReadOnlyError(self)

1590

# we hope there are no repositories with inconsistent parentage

1591

# anymore.

1592

1593

changed = False

1594

keys = {}

1595

for (key, value, refs) in records:

1596

if not self._parents:

1597

if refs:

1598

for ref in refs:

1599

if ref:

1600

raise KnitCorrupt(self,

1601

"attempt to add node with parents "

1602

"in parentless index.")

1603

refs = ()

1604

changed = True

1605

keys[key] = (value, refs)

1606

# check for dups

1607

if not random_id:

1608

present_nodes = self._get_entries(keys)

1609

for (index, key, value, node_refs) in present_nodes:

1610

if node_refs != keys[key][1]:

1611

raise errors.KnitCorrupt(self, "inconsistent details in add_records"

1612

": %s %s" % ((value, node_refs), keys[key]))

1613

del keys[key]

1614

changed = True

1615

if changed:

1616

result = []

1617

if self._parents:

1618

for key, (value, node_refs) in keys.iteritems():

1619

result.append((key, value, node_refs))

1620

else:

1621

for key, (value, node_refs) in keys.iteritems():

1622

result.append((key, value))

1623

records = result

1624

key_dependencies = self._key_dependencies

1625

if key_dependencies is not None and self._parents:

1626

for key, value, refs in records:

1627

parents = refs[0]

1628

key_dependencies.add_references(key, parents)

1629

self._add_callback(records)

1630

1631

def _check_read(self):

1632

"""Raise an exception if reads are not permitted."""

1633

if not self._is_locked():

1634

raise errors.ObjectNotLocked(self)

1635

1636

def _check_write_ok(self):

1637

"""Raise an exception if writes are not permitted."""

1638

if not self._is_locked():

1639

raise errors.ObjectNotLocked(self)

1640

1641

def _get_entries(self, keys, check_present=False):

1642

"""Get the entries for keys.

1643

1644

Note: Callers are responsible for checking that the index is locked

1645

before calling this method.

1646

1647

:param keys: An iterable of index key tuples.

1648

"""

1649

keys = set(keys)

1650

found_keys = set()

1651

if self._parents:

1652

for node in self._graph_index.iter_entries(keys):

1653

yield node

1654

found_keys.add(node[1])

1655

else:

1656

# adapt parentless index to the rest of the code.

1657

for node in self._graph_index.iter_entries(keys):

1658

yield node[0], node[1], node[2], ()

1659

found_keys.add(node[1])

1660

if check_present:

1661

missing_keys = keys.difference(found_keys)

1662

if missing_keys:

1663

raise RevisionNotPresent(missing_keys.pop(), self)

1664

1665

def get_parent_map(self, keys):

1666

"""Get a map of the parents of keys.

1667

1668

:param keys: The keys to look up parents for.

1669

:return: A mapping from keys to parents. Absent keys are absent from

1670

the mapping.

1671

"""

1672

self._check_read()

1673

nodes = self._get_entries(keys)

1674

result = {}

1675

if self._parents:

1676

for node in nodes:

1677

result[node[1]] = node[3][0]

1678

else:

1679

for node in nodes:

1680

result[node[1]] = None

1681

return result

1682

1683

def get_missing_parents(self):

1684

"""Return the keys of missing parents."""

1685

# Copied from _KnitGraphIndex.get_missing_parents

1686

# We may have false positives, so filter those out.

1687

self._key_dependencies.add_keys(

1688

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1689

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1690

1691

def get_build_details(self, keys):

1692

"""Get the various build details for keys.

1693

1694

Ghosts are omitted from the result.

1695

1696

:param keys: An iterable of keys.

1697

:return: A dict of key:

1698

(index_memo, compression_parent, parents, record_details).

1699

index_memo

1700

opaque structure to pass to read_records to extract the raw

1701

data

1702

compression_parent

1703

Content that this record is built upon, may be None

1704

parents

1705

Logical parents of this node

1706

record_details

1707

extra information about the content which needs to be passed to

1708

Factory.parse_record

1709

"""

1710

self._check_read()

1711

result = {}

1712

entries = self._get_entries(keys)

1713

for entry in entries:

1714

key = entry[1]

1715

if not self._parents:

1716

parents = None

1717

else:

1718

parents = entry[3][0]

1719

method = 'group'

1720

result[key] = (self._node_to_position(entry),

1721

None, parents, (method, None))

1722

return result

1723

1724

def keys(self):

1725

"""Get all the keys in the collection.

1726

1727

The keys are not ordered.

1728

"""

1729

self._check_read()

1730

return [node[1] for node in self._graph_index.iter_all_entries()]

1731

1732

def _node_to_position(self, node):

1733

"""Convert an index value to position details."""

1734

bits = node[2].split(' ')

1735

# It would be nice not to read the entire gzip.

1736

start = int(bits[0])

1737

stop = int(bits[1])

1738

basis_end = int(bits[2])

1739

delta_end = int(bits[3])

1740

return node[0], start, stop, basis_end, delta_end

1741

1742

def scan_unvalidated_index(self, graph_index):

1743

"""Inform this _GCGraphIndex that there is an unvalidated index.

1744

1745

This allows this _GCGraphIndex to keep track of any missing

1746

compression parents we may want to have filled in to make those

1747

indices valid.

1748

1749

:param graph_index: A GraphIndex

1750

"""

1751

if self._key_dependencies is not None:

1752

# Add parent refs from graph_index (and discard parent refs that

1753

# the graph_index has).

1754

add_refs = self._key_dependencies.add_references

1755

for node in graph_index.iter_all_entries():

1756

add_refs(node[1], node[3][0])

1757

1758

1759

1760

from bzrlib._groupcompress_py import (

1761

apply_delta,

1762

apply_delta_to_source,

1763

encode_base128_int,

1764

decode_base128_int,

1765

decode_copy_instruction,

1766

LinesDeltaIndex,

1767

)

1768

try:

1769

from bzrlib._groupcompress_pyx import (

1770

apply_delta,

1771

apply_delta_to_source,

1772

DeltaIndex,

1773

encode_base128_int,

1774

decode_base128_int,

1775

)

1776

GroupCompressor = PyrexGroupCompressor

1777

except ImportError:

1778

GroupCompressor = PythonGroupCompressor

1779

Older »