~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Martin Pool
Date: 2005-07-22 22:37:53 UTC
Revision ID: mbp@sourcefrog.net-20050722223753-7dced4e32d3ce21d

- add the start of a test for inventory file-id matching

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/new-inventory-sample.xml

notes/performance.txt

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

plugins/changeset

plugins/changeset/__init__.py

plugins/changeset/apply_changeset.py

plugins/changeset/common.py

plugins/changeset/gen_changeset.py

plugins/changeset/read_changeset.py

plugins/checkperms

testbzr

testsweet.py

urlgrabber

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/keepalive.py

urlgrabber/mirror.py

urlgrabber/progress.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

Makefile

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.h

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/inventory_delta.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/emacs

contrib/emacs/bzr-mode.el

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/HACKING.txt

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/util/effbot/ => effbot/

bzrlib/util/elementtree/ => elementtree/

bzrlib/plugins/ => plugins/

bzrlib/tests/test_weave.py => tools/testweave.py

files modified:
.bzrignore

.rsyncexclude

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/__init__.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwclient.full

contrib/pwk

contrib/zsh/_bzr

elementtree/ElementTree.py

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

from itertools import izip

from cStringIO import StringIO

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

diff,

errors,

graph as _mod_graph,

osutils,

pack,

patiencediff,

trace,

)

from bzrlib.graph import Graph

from bzrlib.knit import _DirectPackAccess

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

except KeyError:

per_prefix_map[prefix] = [item]

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

100

GCB_LZ_HEADER = 'gcb1l\n'

101

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

102

103

def __init__(self):

104

# map by key? or just order in file?

105

self._compressor_name = None

106

self._z_content = None

107

self._z_content_decompressor = None

108

self._z_content_length = None

109

self._content_length = None

110

self._content = None

111

112

def __len__(self):

113

# This is the maximum number of bytes this object will reference if

114

# everything is decompressed. However, if we decompress less than

115

# everything... (this would cause some problems for LRUSizeCache)

116

return self._content_length + self._z_content_length

117

118

def _ensure_content(self, num_bytes=None):

119

"""Make sure that content has been expanded enough.

120

121

:param num_bytes: Ensure that we have extracted at least num_bytes of

122

content. If None, consume everything

123

"""

124

# TODO: If we re-use the same content block at different times during

125

# get_record_stream(), it is possible that the first pass will

126

# get inserted, triggering an extract/_ensure_content() which

127

# will get rid of _z_content. And then the next use of the block

128

# will try to access _z_content (to send it over the wire), and

129

# fail because it is already extracted. Consider never releasing

130

# _z_content because of this.

131

if num_bytes is None:

132

num_bytes = self._content_length

133

elif (self._content_length is not None

134

and num_bytes > self._content_length):

135

raise AssertionError(

136

'requested num_bytes (%d) > content length (%d)'

137

% (num_bytes, self._content_length))

138

# Expand the content if required

139

if self._content is None:

140

if self._z_content is None:

141

raise AssertionError('No content to decompress')

142

if self._z_content == '':

143

self._content = ''

144

elif self._compressor_name == 'lzma':

145

# We don't do partial lzma decomp yet

146

self._content = pylzma.decompress(self._z_content)

147

elif self._compressor_name == 'zlib':

148

# Start a zlib decompressor

149

if num_bytes is None:

150

self._content = zlib.decompress(self._z_content)

151

else:

152

self._z_content_decompressor = zlib.decompressobj()

153

# Seed the decompressor with the uncompressed bytes, so

154

# that the rest of the code is simplified

155

self._content = self._z_content_decompressor.decompress(

156

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

157

else:

158

raise AssertionError('Unknown compressor: %r'

159

% self._compressor_name)

160

# Any bytes remaining to be decompressed will be in the decompressors

161

# 'unconsumed_tail'

162

163

# Do we have enough bytes already?

164

if num_bytes is not None and len(self._content) >= num_bytes:

165

return

166

if num_bytes is None and self._z_content_decompressor is None:

167

# We must have already decompressed everything

168

return

169

# If we got this far, and don't have a decompressor, something is wrong

170

if self._z_content_decompressor is None:

171

raise AssertionError(

172

'No decompressor to decompress %d bytes' % num_bytes)

173

remaining_decomp = self._z_content_decompressor.unconsumed_tail

174

if num_bytes is None:

175

if remaining_decomp:

176

# We don't know how much is left, but we'll decompress it all

177

self._content += self._z_content_decompressor.decompress(

178

remaining_decomp)

179

# Note: There's what I consider a bug in zlib.decompressobj

180

# If you pass back in the entire unconsumed_tail, only

181

# this time you don't pass a max-size, it doesn't

182

# change the unconsumed_tail back to None/''.

183

# However, we know we are done with the whole stream

184

self._z_content_decompressor = None

185

# XXX: Why is this the only place in this routine we set this?

186

self._content_length = len(self._content)

187

else:

188

if not remaining_decomp:

189

raise AssertionError('Nothing left to decompress')

190

needed_bytes = num_bytes - len(self._content)

191

# We always set max_size to 32kB over the minimum needed, so that

192

# zlib will give us as much as we really want.

193

# TODO: If this isn't good enough, we could make a loop here,

194

# that keeps expanding the request until we get enough

195

self._content += self._z_content_decompressor.decompress(

196

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

197

if len(self._content) < num_bytes:

198

raise AssertionError('%d bytes wanted, only %d available'

199

% (num_bytes, len(self._content)))

200

if not self._z_content_decompressor.unconsumed_tail:

201

# The stream is finished

202

self._z_content_decompressor = None

203

204

def _parse_bytes(self, bytes, pos):

205

"""Read the various lengths from the header.

206

207

This also populates the various 'compressed' buffers.

208

209

:return: The position in bytes just after the last newline

210

"""

211

# At present, we have 2 integers for the compressed and uncompressed

212

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

213

# checking too far, cap the search to 14 bytes.

214

pos2 = bytes.index('\n', pos, pos + 14)

215

self._z_content_length = int(bytes[pos:pos2])

216

pos = pos2 + 1

217

pos2 = bytes.index('\n', pos, pos + 14)

218

self._content_length = int(bytes[pos:pos2])

219

pos = pos2 + 1

220

if len(bytes) != (pos + self._z_content_length):

221

# XXX: Define some GCCorrupt error ?

222

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

223

(len(bytes), pos, self._z_content_length))

224

self._z_content = bytes[pos:]

225

226

@classmethod

227

def from_bytes(cls, bytes):

228

out = cls()

229

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

230

raise ValueError('bytes did not start with any of %r'

231

% (cls.GCB_KNOWN_HEADERS,))

232

# XXX: why not testing the whole header ?

233

if bytes[4] == 'z':

234

out._compressor_name = 'zlib'

235

elif bytes[4] == 'l':

236

out._compressor_name = 'lzma'

237

else:

238

raise ValueError('unknown compressor: %r' % (bytes,))

239

out._parse_bytes(bytes, 6)

240

return out

241

242

def extract(self, key, start, end, sha1=None):

243

"""Extract the text for a specific key.

244

245

:param key: The label used for this content

246

:param sha1: TODO (should we validate only when sha1 is supplied?)

247

:return: The bytes for the content

248

"""

249

if start == end == 0:

250

return ''

251

self._ensure_content(end)

252

# The bytes are 'f' or 'd' for the type, then a variable-length

253

# base128 integer for the content size, then the actual content

254

# We know that the variable-length integer won't be longer than 5

255

# bytes (it takes 5 bytes to encode 2^32)

256

c = self._content[start]

257

if c == 'f':

258

type = 'fulltext'

259

else:

260

if c != 'd':

261

raise ValueError('Unknown content control code: %s'

262

% (c,))

263

type = 'delta'

264

content_len, len_len = decode_base128_int(

265

self._content[start + 1:start + 6])

266

content_start = start + 1 + len_len

267

if end != content_start + content_len:

268

raise ValueError('end != len according to field header'

269

' %s != %s' % (end, content_start + content_len))

270

if c == 'f':

271

bytes = self._content[content_start:end]

272

elif c == 'd':

273

bytes = apply_delta_to_source(self._content, content_start, end)

274

return bytes

275

276

def set_content(self, content):

277

"""Set the content of this block."""

278

self._content_length = len(content)

279

self._content = content

280

self._z_content = None

281

282

def to_bytes(self):

283

"""Encode the information into a byte stream."""

284

compress = zlib.compress

285

if _USE_LZMA:

286

compress = pylzma.compress

287

if self._z_content is None:

288

if self._content is None:

289

raise AssertionError('Nothing to compress')

290

self._z_content = compress(self._content)

291

self._z_content_length = len(self._z_content)

292

if _USE_LZMA:

293

header = self.GCB_LZ_HEADER

294

else:

295

header = self.GCB_HEADER

296

chunks = [header,

297

'%d\n%d\n' % (self._z_content_length, self._content_length),

298

self._z_content,

299

]

300

return ''.join(chunks)

301

302

def _dump(self, include_text=False):

303

"""Take this block, and spit out a human-readable structure.

304

305

:param include_text: Inserts also include text bits, chose whether you

306

want this displayed in the dump or not.

307

:return: A dump of the given block. The layout is something like:

308

[('f', length), ('d', delta_length, text_length, [delta_info])]

309

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

310

...]

311

"""

312

self._ensure_content()

313

result = []

314

pos = 0

315

while pos < self._content_length:

316

kind = self._content[pos]

317

pos += 1

318

if kind not in ('f', 'd'):

319

raise ValueError('invalid kind character: %r' % (kind,))

320

content_len, len_len = decode_base128_int(

321

self._content[pos:pos + 5])

322

pos += len_len

323

if content_len + pos > self._content_length:

324

raise ValueError('invalid content_len %d for record @ pos %d'

325

% (content_len, pos - len_len - 1))

326

if kind == 'f': # Fulltext

327

result.append(('f', content_len))

328

elif kind == 'd': # Delta

329

delta_content = self._content[pos:pos+content_len]

330

delta_info = []

331

# The first entry in a delta is the decompressed length

332

decomp_len, delta_pos = decode_base128_int(delta_content)

333

result.append(('d', content_len, decomp_len, delta_info))

334

measured_len = 0

335

while delta_pos < content_len:

336

c = ord(delta_content[delta_pos])

337

delta_pos += 1

338

if c & 0x80: # Copy

339

(offset, length,

340

delta_pos) = decode_copy_instruction(delta_content, c,

341

delta_pos)

342

delta_info.append(('c', offset, length))

343

measured_len += length

344

else: # Insert

345

if include_text:

346

txt = delta_content[delta_pos:delta_pos+c]

347

else:

348

txt = ''

349

delta_info.append(('i', c, txt))

350

measured_len += c

351

delta_pos += c

352

if delta_pos != content_len:

353

raise ValueError('Delta consumed a bad number of bytes:'

354

' %d != %d' % (delta_pos, content_len))

355

if measured_len != decomp_len:

356

raise ValueError('Delta claimed fulltext was %d bytes, but'

357

' extraction resulted in %d bytes'

358

% (decomp_len, measured_len))

359

pos += content_len

360

return result

361

362

363

class _LazyGroupCompressFactory(object):

364

"""Yield content from a GroupCompressBlock on demand."""

365

366

def __init__(self, key, parents, manager, start, end, first):

367

"""Create a _LazyGroupCompressFactory

368

369

:param key: The key of just this record

370

:param parents: The parents of this key (possibly None)

371

:param gc_block: A GroupCompressBlock object

372

:param start: Offset of the first byte for this record in the

373

uncompressd content

374

:param end: Offset of the byte just after the end of this record

375

(ie, bytes = content[start:end])

376

:param first: Is this the first Factory for the given block?

377

"""

378

self.key = key

379

self.parents = parents

380

self.sha1 = None

381

# Note: This attribute coupled with Manager._factories creates a

382

# reference cycle. Perhaps we would rather use a weakref(), or

383

# find an appropriate time to release the ref. After the first

384

# get_bytes_as call? After Manager.get_record_stream() returns

385

# the object?

386

self._manager = manager

387

self._bytes = None

388

self.storage_kind = 'groupcompress-block'

389

if not first:

390

self.storage_kind = 'groupcompress-block-ref'

391

self._first = first

392

self._start = start

393

self._end = end

394

395

def __repr__(self):

396

return '%s(%s, first=%s)' % (self.__class__.__name__,

397

self.key, self._first)

398

399

def get_bytes_as(self, storage_kind):

400

if storage_kind == self.storage_kind:

401

if self._first:

402

# wire bytes, something...

403

return self._manager._wire_bytes()

404

else:

405

return ''

406

if storage_kind in ('fulltext', 'chunked'):

407

if self._bytes is None:

408

# Grab and cache the raw bytes for this entry

409

# and break the ref-cycle with _manager since we don't need it

410

# anymore

411

self._manager._prepare_for_extract()

412

block = self._manager._block

413

self._bytes = block.extract(self.key, self._start, self._end)

414

# There are code paths that first extract as fulltext, and then

415

# extract as storage_kind (smart fetch). So we don't break the

416

# refcycle here, but instead in manager.get_record_stream()

417

# self._manager = None

418

if storage_kind == 'fulltext':

419

return self._bytes

420

else:

421

return [self._bytes]

422

raise errors.UnavailableRepresentation(self.key, storage_kind,

423

self.storage_kind)

424

425

426

class _LazyGroupContentManager(object):

427

"""This manages a group of _LazyGroupCompressFactory objects."""

428

429

def __init__(self, block):

430

self._block = block

431

# We need to preserve the ordering

432

self._factories = []

433

self._last_byte = 0

434

435

def add_factory(self, key, parents, start, end):

436

if not self._factories:

437

first = True

438

else:

439

first = False

440

# Note that this creates a reference cycle....

441

factory = _LazyGroupCompressFactory(key, parents, self,

442

start, end, first=first)

443

# max() works here, but as a function call, doing a compare seems to be

444

# significantly faster, timeit says 250ms for max() and 100ms for the

445

# comparison

446

if end > self._last_byte:

447

self._last_byte = end

448

self._factories.append(factory)

449

450

def get_record_stream(self):

451

"""Get a record for all keys added so far."""

452

for factory in self._factories:

453

yield factory

454

# Break the ref-cycle

455

factory._bytes = None

456

factory._manager = None

457

# TODO: Consider setting self._factories = None after the above loop,

458

# as it will break the reference cycle

459

460

def _trim_block(self, last_byte):

461

"""Create a new GroupCompressBlock, with just some of the content."""

462

# None of the factories need to be adjusted, because the content is

463

# located in an identical place. Just that some of the unreferenced

464

# trailing bytes are stripped

465

trace.mutter('stripping trailing bytes from groupcompress block'

466

' %d => %d', self._block._content_length, last_byte)

467

new_block = GroupCompressBlock()

468

self._block._ensure_content(last_byte)

469

new_block.set_content(self._block._content[:last_byte])

470

self._block = new_block

471

472

def _rebuild_block(self):

473

"""Create a new GroupCompressBlock with only the referenced texts."""

474

compressor = GroupCompressor()

475

tstart = time.time()

476

old_length = self._block._content_length

477

end_point = 0

478

for factory in self._factories:

479

bytes = factory.get_bytes_as('fulltext')

480

(found_sha1, start_point, end_point,

481

type) = compressor.compress(factory.key, bytes, factory.sha1)

482

# Now update this factory with the new offsets, etc

483

factory.sha1 = found_sha1

484

factory._start = start_point

485

factory._end = end_point

486

self._last_byte = end_point

487

new_block = compressor.flush()

488

# TODO: Should we check that new_block really *is* smaller than the old

489

# block? It seems hard to come up with a method that it would

490

# expand, since we do full compression again. Perhaps based on a

491

# request that ends up poorly ordered?

492

delta = time.time() - tstart

493

self._block = new_block

494

trace.mutter('creating new compressed block on-the-fly in %.3fs'

495

' %d bytes => %d bytes', delta, old_length,

496

self._block._content_length)

497

498

def _prepare_for_extract(self):

499

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

500

# We expect that if one child is going to fulltext, all will be. This

501

# helps prevent all of them from extracting a small amount at a time.

502

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

503

# time (self._block._content) is a little expensive.

504

self._block._ensure_content(self._last_byte)

505

506

def _check_rebuild_block(self):

507

"""Check to see if our block should be repacked."""

508

total_bytes_used = 0

509

last_byte_used = 0

510

for factory in self._factories:

511

total_bytes_used += factory._end - factory._start

512

last_byte_used = max(last_byte_used, factory._end)

513

# If we are using most of the bytes from the block, we have nothing

514

# else to check (currently more that 1/2)

515

if total_bytes_used * 2 >= self._block._content_length:

516

return

517

# Can we just strip off the trailing bytes? If we are going to be

518

# transmitting more than 50% of the front of the content, go ahead

519

if total_bytes_used * 2 > last_byte_used:

520

self._trim_block(last_byte_used)

521

return

522

523

# We are using a small amount of the data, and it isn't just packed

524

# nicely at the front, so rebuild the content.

525

# Note: This would be *nicer* as a strip-data-from-group, rather than

526

# building it up again from scratch

527

# It might be reasonable to consider the fulltext sizes for

528

# different bits when deciding this, too. As you may have a small

529

# fulltext, and a trivial delta, and you are just trading around

530

# for another fulltext. If we do a simple 'prune' you may end up

531

# expanding many deltas into fulltexts, as well.

532

# If we build a cheap enough 'strip', then we could try a strip,

533

# if that expands the content, we then rebuild.

534

self._rebuild_block()

535

536

def _wire_bytes(self):

537

"""Return a byte stream suitable for transmitting over the wire."""

538

self._check_rebuild_block()

539

# The outer block starts with:

540

# 'groupcompress-block\n'

541

# <length of compressed key info>\n

542

# <length of uncompressed info>\n

543

# <length of gc block>\n

544

# <header bytes>

545

# <gc-block>

546

lines = ['groupcompress-block\n']

547

# The minimal info we need is the key, the start offset, and the

548

# parents. The length and type are encoded in the record itself.

549

# However, passing in the other bits makes it easier. The list of

550

# keys, and the start offset, the length

551

# 1 line key

552

# 1 line with parents, '' for ()

553

# 1 line for start offset

554

# 1 line for end byte

555

header_lines = []

556

for factory in self._factories:

557

key_bytes = '\x00'.join(factory.key)

558

parents = factory.parents

559

if parents is None:

560

parent_bytes = 'None:'

561

else:

562

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

563

record_header = '%s\n%s\n%d\n%d\n' % (

564

key_bytes, parent_bytes, factory._start, factory._end)

565

header_lines.append(record_header)

566

# TODO: Can we break the refcycle at this point and set

567

# factory._manager = None?

568

header_bytes = ''.join(header_lines)

569

del header_lines

570

header_bytes_len = len(header_bytes)

571

z_header_bytes = zlib.compress(header_bytes)

572

del header_bytes

573

z_header_bytes_len = len(z_header_bytes)

574

block_bytes = self._block.to_bytes()

575

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

576

len(block_bytes)))

577

lines.append(z_header_bytes)

578

lines.append(block_bytes)

579

del z_header_bytes, block_bytes

580

return ''.join(lines)

581

582

@classmethod

583

def from_bytes(cls, bytes):

584

# TODO: This does extra string copying, probably better to do it a

585

# different way

586

(storage_kind, z_header_len, header_len,

587

block_len, rest) = bytes.split('\n', 4)

588

del bytes

589

if storage_kind != 'groupcompress-block':

590

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

591

z_header_len = int(z_header_len)

592

if len(rest) < z_header_len:

593

raise ValueError('Compressed header len shorter than all bytes')

594

z_header = rest[:z_header_len]

595

header_len = int(header_len)

596

header = zlib.decompress(z_header)

597

if len(header) != header_len:

598

raise ValueError('invalid length for decompressed bytes')

599

del z_header

600

block_len = int(block_len)

601

if len(rest) != z_header_len + block_len:

602

raise ValueError('Invalid length for block')

603

block_bytes = rest[z_header_len:]

604

del rest

605

# So now we have a valid GCB, we just need to parse the factories that

606

# were sent to us

607

header_lines = header.split('\n')

608

del header

609

last = header_lines.pop()

610

if last != '':

611

raise ValueError('header lines did not end with a trailing'

612

' newline')

613

if len(header_lines) % 4 != 0:

614

raise ValueError('The header was not an even multiple of 4 lines')

615

block = GroupCompressBlock.from_bytes(block_bytes)

616

del block_bytes

617

result = cls(block)

618

for start in xrange(0, len(header_lines), 4):

619

# intern()?

620

key = tuple(header_lines[start].split('\x00'))

621

parents_line = header_lines[start+1]

622

if parents_line == 'None:':

623

parents = None

624

else:

625

parents = tuple([tuple(segment.split('\x00'))

626

for segment in parents_line.split('\t')

627

if segment])

628

start_offset = int(header_lines[start+2])

629

end_offset = int(header_lines[start+3])

630

result.add_factory(key, parents, start_offset, end_offset)

631

return result

632

633

634

def network_block_to_records(storage_kind, bytes, line_end):

635

if storage_kind != 'groupcompress-block':

636

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

637

manager = _LazyGroupContentManager.from_bytes(bytes)

638

return manager.get_record_stream()

639

640

641

class _CommonGroupCompressor(object):

642

643

def __init__(self):

644

"""Create a GroupCompressor."""

645

self.chunks = []

646

self._last = None

647

self.endpoint = 0

648

self.input_bytes = 0

649

self.labels_deltas = {}

650

self._delta_index = None # Set by the children

651

self._block = GroupCompressBlock()

652

653

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

654

"""Compress lines with label key.

655

656

:param key: A key tuple. It is stored in the output

657

for identification of the text during decompression. If the last

658

element is 'None' it is replaced with the sha1 of the text -

659

e.g. sha1:xxxxxxx.

660

:param bytes: The bytes to be compressed

661

:param expected_sha: If non-None, the sha the lines are believed to

662

have. During compression the sha is calculated; a mismatch will

663

cause an error.

664

:param nostore_sha: If the computed sha1 sum matches, we will raise

665

ExistingContent rather than adding the text.

666

:param soft: Do a 'soft' compression. This means that we require larger

667

ranges to match to be considered for a copy command.

668

669

:return: The sha1 of lines, the start and end offsets in the delta, and

670

the type ('fulltext' or 'delta').

671

672

:seealso VersionedFiles.add_lines:

673

"""

674

if not bytes: # empty, like a dir entry, etc

675

if nostore_sha == _null_sha1:

676

raise errors.ExistingContent()

677

return _null_sha1, 0, 0, 'fulltext'

678

# we assume someone knew what they were doing when they passed it in

679

if expected_sha is not None:

680

sha1 = expected_sha

681

else:

682

sha1 = osutils.sha_string(bytes)

683

if nostore_sha is not None:

684

if sha1 == nostore_sha:

685

raise errors.ExistingContent()

686

if key[-1] is None:

687

key = key[:-1] + ('sha1:' + sha1,)

688

689

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

690

return sha1, start, end, type

691

692

def _compress(self, key, bytes, max_delta_size, soft=False):

693

"""Compress lines with label key.

694

695

:param key: A key tuple. It is stored in the output for identification

696

of the text during decompression.

697

698

:param bytes: The bytes to be compressed

699

700

:param max_delta_size: The size above which we issue a fulltext instead

701

of a delta.

702

703

:param soft: Do a 'soft' compression. This means that we require larger

704

ranges to match to be considered for a copy command.

705

706

:return: The sha1 of lines, the start and end offsets in the delta, and

707

the type ('fulltext' or 'delta').

708

"""

709

raise NotImplementedError(self._compress)

710

711

def extract(self, key):

712

"""Extract a key previously added to the compressor.

713

714

:param key: The key to extract.

715

:return: An iterable over bytes and the sha1.

716

"""

717

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

718

delta_chunks = self.chunks[start_chunk:end_chunk]

719

stored_bytes = ''.join(delta_chunks)

720

if stored_bytes[0] == 'f':

721

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

722

data_len = fulltext_len + 1 + offset

723

if data_len != len(stored_bytes):

724

raise ValueError('Index claimed fulltext len, but stored bytes'

725

' claim %s != %s'

726

% (len(stored_bytes), data_len))

727

bytes = stored_bytes[offset + 1:]

728

else:

729

# XXX: This is inefficient at best

730

source = ''.join(self.chunks[:start_chunk])

731

if stored_bytes[0] != 'd':

732

raise ValueError('Unknown content kind, bytes claim %s'

733

% (stored_bytes[0],))

734

delta_len, offset = decode_base128_int(stored_bytes[1:10])

735

data_len = delta_len + 1 + offset

736

if data_len != len(stored_bytes):

737

raise ValueError('Index claimed delta len, but stored bytes'

738

' claim %s != %s'

739

% (len(stored_bytes), data_len))

740

bytes = apply_delta(source, stored_bytes[offset + 1:])

741

bytes_sha1 = osutils.sha_string(bytes)

742

return bytes, bytes_sha1

743

744

def flush(self):

745

"""Finish this group, creating a formatted stream.

746

747

After calling this, the compressor should no longer be used

748

"""

749

content = ''.join(self.chunks)

750

self.chunks = None

751

self._delta_index = None

752

self._block.set_content(content)

753

return self._block

754

755

def pop_last(self):

756

"""Call this if you want to 'revoke' the last compression.

757

758

After this, the data structures will be rolled back, but you cannot do

759

more compression.

760

"""

761

self._delta_index = None

762

del self.chunks[self._last[0]:]

763

self.endpoint = self._last[1]

764

self._last = None

765

766

def ratio(self):

767

"""Return the overall compression ratio."""

768

return float(self.input_bytes) / float(self.endpoint)

769

770

771

class PythonGroupCompressor(_CommonGroupCompressor):

772

773

def __init__(self):

774

"""Create a GroupCompressor.

775

776

Used only if the pyrex version is not available.

777

"""

778

super(PythonGroupCompressor, self).__init__()

779

self._delta_index = LinesDeltaIndex([])

780

# The actual content is managed by LinesDeltaIndex

781

self.chunks = self._delta_index.lines

782

783

def _compress(self, key, bytes, max_delta_size, soft=False):

784

"""see _CommonGroupCompressor._compress"""

785

input_len = len(bytes)

786

new_lines = osutils.split_lines(bytes)

787

out_lines, index_lines = self._delta_index.make_delta(

788

new_lines, bytes_length=input_len, soft=soft)

789

delta_length = sum(map(len, out_lines))

790

if delta_length > max_delta_size:

791

# The delta is longer than the fulltext, insert a fulltext

792

type = 'fulltext'

793

out_lines = ['f', encode_base128_int(input_len)]

794

out_lines.extend(new_lines)

795

index_lines = [False, False]

796

index_lines.extend([True] * len(new_lines))

797

else:

798

# this is a worthy delta, output it

799

type = 'delta'

800

out_lines[0] = 'd'

801

# Update the delta_length to include those two encoded integers

802

out_lines[1] = encode_base128_int(delta_length)

803

# Before insertion

804

start = self.endpoint

805

chunk_start = len(self.chunks)

806

self._last = (chunk_start, self.endpoint)

807

self._delta_index.extend_lines(out_lines, index_lines)

808

self.endpoint = self._delta_index.endpoint

809

self.input_bytes += input_len

810

chunk_end = len(self.chunks)

811

self.labels_deltas[key] = (start, chunk_start,

812

self.endpoint, chunk_end)

813

return start, self.endpoint, type

814

815

816

class PyrexGroupCompressor(_CommonGroupCompressor):

817

"""Produce a serialised group of compressed texts.

818

819

It contains code very similar to SequenceMatcher because of having a similar

820

task. However some key differences apply:

821

- there is no junk, we want a minimal edit not a human readable diff.

822

- we don't filter very common lines (because we don't know where a good

823

range will start, and after the first text we want to be emitting minmal

824

edits only.

825

- we chain the left side, not the right side

826

- we incrementally update the adjacency matrix as new lines are provided.

827

- we look for matches in all of the left side, so the routine which does

828

the analagous task of find_longest_match does not need to filter on the

829

left side.

830

"""

831

832

def __init__(self):

833

super(PyrexGroupCompressor, self).__init__()

834

self._delta_index = DeltaIndex()

835

836

def _compress(self, key, bytes, max_delta_size, soft=False):

837

"""see _CommonGroupCompressor._compress"""

838

input_len = len(bytes)

839

# By having action/label/sha1/len, we can parse the group if the index

840

# was ever destroyed, we have the key in 'label', we know the final

841

# bytes are valid from sha1, and we know where to find the end of this

842

# record because of 'len'. (the delta record itself will store the

843

# total length for the expanded record)

844

# 'len: %d\n' costs approximately 1% increase in total data

845

# Having the labels at all costs us 9-10% increase, 38% increase for

846

# inventory pages, and 5.8% increase for text pages

847

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

848

if self._delta_index._source_offset != self.endpoint:

849

raise AssertionError('_source_offset != endpoint'

850

' somehow the DeltaIndex got out of sync with'

851

' the output lines')

852

delta = self._delta_index.make_delta(bytes, max_delta_size)

853

if (delta is None):

854

type = 'fulltext'

855

enc_length = encode_base128_int(len(bytes))

856

len_mini_header = 1 + len(enc_length)

857

self._delta_index.add_source(bytes, len_mini_header)

858

new_chunks = ['f', enc_length, bytes]

859

else:

860

type = 'delta'

861

enc_length = encode_base128_int(len(delta))

862

len_mini_header = 1 + len(enc_length)

863

new_chunks = ['d', enc_length, delta]

864

self._delta_index.add_delta_source(delta, len_mini_header)

865

# Before insertion

866

start = self.endpoint

867

chunk_start = len(self.chunks)

868

# Now output these bytes

869

self._output_chunks(new_chunks)

870

self.input_bytes += input_len

871

chunk_end = len(self.chunks)

872

self.labels_deltas[key] = (start, chunk_start,

873

self.endpoint, chunk_end)

874

if not self._delta_index._source_offset == self.endpoint:

875

raise AssertionError('the delta index is out of sync'

876

'with the output lines %s != %s'

877

% (self._delta_index._source_offset, self.endpoint))

878

return start, self.endpoint, type

879

880

def _output_chunks(self, new_chunks):

881

"""Output some chunks.

882

883

:param new_chunks: The chunks to output.

884

"""

885

self._last = (len(self.chunks), self.endpoint)

886

endpoint = self.endpoint

887

self.chunks.extend(new_chunks)

888

endpoint += sum(map(len, new_chunks))

889

self.endpoint = endpoint

890

891

892

def make_pack_factory(graph, delta, keylength):

893

"""Create a factory for creating a pack based groupcompress.

894

895

This is only functional enough to run interface tests, it doesn't try to

896

provide a full pack environment.

897

898

:param graph: Store a graph.

899

:param delta: Delta compress contents.

900

:param keylength: How long should keys be.

901

"""

902

def factory(transport):

903

parents = graph

904

ref_length = 0

905

if graph:

906

ref_length = 1

907

graph_index = BTreeBuilder(reference_lists=ref_length,

908

key_elements=keylength)

909

stream = transport.open_write_stream('newpack')

910

writer = pack.ContainerWriter(stream.write)

911

writer.begin()

912

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

913

add_callback=graph_index.add_nodes)

914

access = _DirectPackAccess({})

915

access.set_writer(writer, graph_index, (transport, 'newpack'))

916

result = GroupCompressVersionedFiles(index, access, delta)

917

result.stream = stream

918

result.writer = writer

919

return result

920

return factory

921

922

923

def cleanup_pack_group(versioned_files):

924

versioned_files.writer.end()

925

versioned_files.stream.close()

926

927

928

class GroupCompressVersionedFiles(VersionedFiles):

929

"""A group-compress based VersionedFiles implementation."""

930

931

def __init__(self, index, access, delta=True):

932

"""Create a GroupCompressVersionedFiles object.

933

934

:param index: The index object storing access and graph data.

935

:param access: The access object storing raw data.

936

:param delta: Whether to delta compress or just entropy compress.

937

"""

938

self._index = index

939

self._access = access

940

self._delta = delta

941

self._unadded_refs = {}

942

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

943

self._fallback_vfs = []

944

945

def add_lines(self, key, parents, lines, parent_texts=None,

946

left_matching_blocks=None, nostore_sha=None, random_id=False,

947

check_content=True):

948

"""Add a text to the store.

949

950

:param key: The key tuple of the text to add.

951

:param parents: The parents key tuples of the text to add.

952

:param lines: A list of lines. Each line must be a bytestring. And all

953

of them except the last must be terminated with \n and contain no

954

other \n's. The last line may either contain no \n's or a single

955

terminating \n. If the lines list does meet this constraint the add

956

routine may error or may succeed - but you will be unable to read

957

the data back accurately. (Checking the lines have been split

958

correctly is expensive and extremely unlikely to catch bugs so it

959

is not done at runtime unless check_content is True.)

960

:param parent_texts: An optional dictionary containing the opaque

961

representations of some or all of the parents of version_id to

962

allow delta optimisations. VERY IMPORTANT: the texts must be those

963

returned by add_lines or data corruption can be caused.

964

:param left_matching_blocks: a hint about which areas are common

965

between the text and its left-hand-parent. The format is

966

the SequenceMatcher.get_matching_blocks format.

967

:param nostore_sha: Raise ExistingContent and do not add the lines to

968

the versioned file if the digest of the lines matches this.

969

:param random_id: If True a random id has been selected rather than

970

an id determined by some deterministic process such as a converter

971

from a foreign VCS. When True the backend may choose not to check

972

for uniqueness of the resulting key within the versioned file, so

973

this should only be done when the result is expected to be unique

974

anyway.

975

:param check_content: If True, the lines supplied are verified to be

976

bytestrings that are correctly formed lines.

977

:return: The text sha1, the number of bytes in the text, and an opaque

978

representation of the inserted version which can be provided

979

back to future add_lines calls in the parent_texts dictionary.

980

"""

981

self._index._check_write_ok()

982

self._check_add(key, lines, random_id, check_content)

983

if parents is None:

984

# The caller might pass None if there is no graph data, but kndx

985

# indexes can't directly store that, so we give them

986

# an empty tuple instead.

987

parents = ()

988

# double handling for now. Make it work until then.

989

length = sum(map(len, lines))

990

record = ChunkedContentFactory(key, parents, None, lines)

991

sha1 = list(self._insert_record_stream([record], random_id=random_id,

992

nostore_sha=nostore_sha))[0]

993

return sha1, length, None

994

995

def add_fallback_versioned_files(self, a_versioned_files):

996

"""Add a source of texts for texts not present in this knit.

997

998

:param a_versioned_files: A VersionedFiles object.

999

"""

1000

self._fallback_vfs.append(a_versioned_files)

1001

1002

def annotate(self, key):

1003

"""See VersionedFiles.annotate."""

1004

graph = Graph(self)

1005

parent_map = self.get_parent_map([key])

1006

if not parent_map:

1007

raise errors.RevisionNotPresent(key, self)

1008

if parent_map[key] is not None:

1009

search = graph._make_breadth_first_searcher([key])

1010

keys = set()

1011

while True:

1012

try:

1013

present, ghosts = search.next_with_ghosts()

1014

except StopIteration:

1015

break

1016

keys.update(present)

1017

parent_map = self.get_parent_map(keys)

1018

else:

1019

keys = [key]

1020

parent_map = {key:()}

1021

head_cache = _mod_graph.FrozenHeadsCache(graph)

1022

parent_cache = {}

1023

reannotate = annotate.reannotate

1024

for record in self.get_record_stream(keys, 'topological', True):

1025

key = record.key

1026

chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1027

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

1028

parent_cache[key] = list(

1029

reannotate(parent_lines, chunks, key, None, head_cache))

1030

return parent_cache[key]

1031

1032

def check(self, progress_bar=None):

1033

"""See VersionedFiles.check()."""

1034

keys = self.keys()

1035

for record in self.get_record_stream(keys, 'unordered', True):

1036

record.get_bytes_as('fulltext')

1037

1038

def _check_add(self, key, lines, random_id, check_content):

1039

"""check that version_id and lines are safe to add."""

1040

version_id = key[-1]

1041

if version_id is not None:

1042

if osutils.contains_whitespace(version_id):

1043

raise errors.InvalidRevisionId(version_id, self)

1044

self.check_not_reserved_id(version_id)

1045

# TODO: If random_id==False and the key is already present, we should

1046

# probably check that the existing content is identical to what is

1047

# being inserted, and otherwise raise an exception. This would make

1048

# the bundle code simpler.

1049

if check_content:

1050

self._check_lines_not_unicode(lines)

1051

self._check_lines_are_lines(lines)

1052

1053

def get_parent_map(self, keys):

1054

"""Get a map of the graph parents of keys.

1055

1056

:param keys: The keys to look up parents for.

1057

:return: A mapping from keys to parents. Absent keys are absent from

1058

the mapping.

1059

"""

1060

return self._get_parent_map_with_sources(keys)[0]

1061

1062

def _get_parent_map_with_sources(self, keys):

1063

"""Get a map of the parents of keys.

1064

1065

:param keys: The keys to look up parents for.

1066

:return: A tuple. The first element is a mapping from keys to parents.

1067

Absent keys are absent from the mapping. The second element is a

1068

list with the locations each key was found in. The first element

1069

is the in-this-knit parents, the second the first fallback source,

1070

and so on.

1071

"""

1072

result = {}

1073

sources = [self._index] + self._fallback_vfs

1074

source_results = []

1075

missing = set(keys)

1076

for source in sources:

1077

if not missing:

1078

break

1079

new_result = source.get_parent_map(missing)

1080

source_results.append(new_result)

1081

result.update(new_result)

1082

missing.difference_update(set(new_result))

1083

return result, source_results

1084

1085

def _get_block(self, index_memo):

1086

read_memo = index_memo[0:3]

1087

# get the group:

1088

try:

1089

block = self._group_cache[read_memo]

1090

except KeyError:

1091

# read the group

1092

zdata = self._access.get_raw_records([read_memo]).next()

1093

# decompress - whole thing - this is not a bug, as it

1094

# permits caching. We might want to store the partially

1095

# decompresed group and decompress object, so that recent

1096

# texts are not penalised by big groups.

1097

block = GroupCompressBlock.from_bytes(zdata)

1098

self._group_cache[read_memo] = block

1099

# cheapo debugging:

1100

# print len(zdata), len(plain)

1101

# parse - requires split_lines, better to have byte offsets

1102

# here (but not by much - we only split the region for the

1103

# recipe, and we often want to end up with lines anyway.

1104

return block

1105

1106

def get_missing_compression_parent_keys(self):

1107

"""Return the keys of missing compression parents.

1108

1109

Missing compression parents occur when a record stream was missing

1110

basis texts, or a index was scanned that had missing basis texts.

1111

"""

1112

# GroupCompress cannot currently reference texts that are not in the

1113

# group, so this is valid for now

1114

return frozenset()

1115

1116

def get_record_stream(self, keys, ordering, include_delta_closure):

1117

"""Get a stream of records for keys.

1118

1119

:param keys: The keys to include.

1120

:param ordering: Either 'unordered' or 'topological'. A topologically

1121

sorted stream has compression parents strictly before their

1122

children.

1123

:param include_delta_closure: If True then the closure across any

1124

compression parents will be included (in the opaque data).

1125

:return: An iterator of ContentFactory objects, each of which is only

1126

valid until the iterator is advanced.

1127

"""

1128

# keys might be a generator

1129

orig_keys = list(keys)

1130

keys = set(keys)

1131

if not keys:

1132

return

1133

if (not self._index.has_graph

1134

and ordering in ('topological', 'groupcompress')):

1135

# Cannot topological order when no graph has been stored.

1136

# but we allow 'as-requested' or 'unordered'

1137

ordering = 'unordered'

1138

1139

remaining_keys = keys

1140

while True:

1141

try:

1142

keys = set(remaining_keys)

1143

for content_factory in self._get_remaining_record_stream(keys,

1144

orig_keys, ordering, include_delta_closure):

1145

remaining_keys.discard(content_factory.key)

1146

yield content_factory

1147

return

1148

except errors.RetryWithNewPacks, e:

1149

self._access.reload_or_raise(e)

1150

1151

def _find_from_fallback(self, missing):

1152

"""Find whatever keys you can from the fallbacks.

1153

1154

:param missing: A set of missing keys. This set will be mutated as keys

1155

are found from a fallback_vfs

1156

:return: (parent_map, key_to_source_map, source_results)

1157

parent_map the overall key => parent_keys

1158

key_to_source_map a dict from {key: source}

1159

source_results a list of (source: keys)

1160

"""

1161

parent_map = {}

1162

key_to_source_map = {}

1163

source_results = []

1164

for source in self._fallback_vfs:

1165

if not missing:

1166

break

1167

source_parents = source.get_parent_map(missing)

1168

parent_map.update(source_parents)

1169

source_parents = list(source_parents)

1170

source_results.append((source, source_parents))

1171

key_to_source_map.update((key, source) for key in source_parents)

1172

missing.difference_update(source_parents)

1173

return parent_map, key_to_source_map, source_results

1174

1175

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1176

"""Get the (source, [keys]) list.

1177

1178

The returned objects should be in the order defined by 'ordering',

1179

which can weave between different sources.

1180

:param ordering: Must be one of 'topological' or 'groupcompress'

1181

:return: List of [(source, [keys])] tuples, such that all keys are in

1182

the defined order, regardless of source.

1183

"""

1184

if ordering == 'topological':

1185

present_keys = topo_sort(parent_map)

1186

else:

1187

# ordering == 'groupcompress'

1188

# XXX: This only optimizes for the target ordering. We may need

1189

# to balance that with the time it takes to extract

1190

# ordering, by somehow grouping based on

1191

# locations[key][0:3]

1192

present_keys = sort_gc_optimal(parent_map)

1193

# Now group by source:

1194

source_keys = []

1195

current_source = None

1196

for key in present_keys:

1197

source = key_to_source_map.get(key, self)

1198

if source is not current_source:

1199

source_keys.append((source, []))

1200

current_source = source

1201

source_keys[-1][1].append(key)

1202

return source_keys

1203

1204

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1205

key_to_source_map):

1206

source_keys = []

1207

current_source = None

1208

for key in orig_keys:

1209

if key in locations or key in unadded_keys:

1210

source = self

1211

elif key in key_to_source_map:

1212

source = key_to_source_map[key]

1213

else: # absent

1214

continue

1215

if source is not current_source:

1216

source_keys.append((source, []))

1217

current_source = source

1218

source_keys[-1][1].append(key)

1219

return source_keys

1220

1221

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1222

source_result):

1223

def get_group(key):

1224

# This is the group the bytes are stored in, followed by the

1225

# location in the group

1226

return locations[key][0]

1227

present_keys = sorted(locations.iterkeys(), key=get_group)

1228

# We don't have an ordering for keys in the in-memory object, but

1229

# lets process the in-memory ones first.

1230

present_keys = list(unadded_keys) + present_keys

1231

# Now grab all of the ones from other sources

1232

source_keys = [(self, present_keys)]

1233

source_keys.extend(source_result)

1234

return source_keys

1235

1236

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1237

include_delta_closure):

1238

"""Get a stream of records for keys.

1239

1240

:param keys: The keys to include.

1241

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1242

'as-requested'

1243

:param include_delta_closure: If True then the closure across any

1244

compression parents will be included (in the opaque data).

1245

:return: An iterator of ContentFactory objects, each of which is only

1246

valid until the iterator is advanced.

1247

"""

1248

# Cheap: iterate

1249

locations = self._index.get_build_details(keys)

1250

unadded_keys = set(self._unadded_refs).intersection(keys)

1251

missing = keys.difference(locations)

1252

missing.difference_update(unadded_keys)

1253

(fallback_parent_map, key_to_source_map,

1254

source_result) = self._find_from_fallback(missing)

1255

if ordering in ('topological', 'groupcompress'):

1256

# would be better to not globally sort initially but instead

1257

# start with one key, recurse to its oldest parent, then grab

1258

# everything in the same group, etc.

1259

parent_map = dict((key, details[2]) for key, details in

1260

locations.iteritems())

1261

for key in unadded_keys:

1262

parent_map[key] = self._unadded_refs[key]

1263

parent_map.update(fallback_parent_map)

1264

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1265

key_to_source_map)

1266

elif ordering == 'as-requested':

1267

source_keys = self._get_as_requested_source_keys(orig_keys,

1268

locations, unadded_keys, key_to_source_map)

1269

else:

1270

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1271

# Otherwise we thrash the _group_cache and destroy performance

1272

source_keys = self._get_io_ordered_source_keys(locations,

1273

unadded_keys, source_result)

1274

for key in missing:

1275

yield AbsentContentFactory(key)

1276

manager = None

1277

last_read_memo = None

1278

# TODO: This works fairly well at batching up existing groups into a

1279

# streamable format, and possibly allowing for taking one big

1280

# group and splitting it when it isn't fully utilized.

1281

# However, it doesn't allow us to find under-utilized groups and

1282

# combine them into a bigger group on the fly.

1283

# (Consider the issue with how chk_map inserts texts

1284

# one-at-a-time.) This could be done at insert_record_stream()

1285

# time, but it probably would decrease the number of

1286

# bytes-on-the-wire for fetch.

1287

for source, keys in source_keys:

1288

if source is self:

1289

for key in keys:

1290

if key in self._unadded_refs:

1291

if manager is not None:

1292

for factory in manager.get_record_stream():

1293

yield factory

1294

last_read_memo = manager = None

1295

bytes, sha1 = self._compressor.extract(key)

1296

parents = self._unadded_refs[key]

1297

yield FulltextContentFactory(key, parents, sha1, bytes)

1298

else:

1299

index_memo, _, parents, (method, _) = locations[key]

1300

read_memo = index_memo[0:3]

1301

if last_read_memo != read_memo:

1302

# We are starting a new block. If we have a

1303

# manager, we have found everything that fits for

1304

# now, so yield records

1305

if manager is not None:

1306

for factory in manager.get_record_stream():

1307

yield factory

1308

# Now start a new manager

1309

block = self._get_block(index_memo)

1310

manager = _LazyGroupContentManager(block)

1311

last_read_memo = read_memo

1312

start, end = index_memo[3:5]

1313

manager.add_factory(key, parents, start, end)

1314

else:

1315

if manager is not None:

1316

for factory in manager.get_record_stream():

1317

yield factory

1318

last_read_memo = manager = None

1319

for record in source.get_record_stream(keys, ordering,

1320

include_delta_closure):

1321

yield record

1322

if manager is not None:

1323

for factory in manager.get_record_stream():

1324

yield factory

1325

1326

def get_sha1s(self, keys):

1327

"""See VersionedFiles.get_sha1s()."""

1328

result = {}

1329

for record in self.get_record_stream(keys, 'unordered', True):

1330

if record.sha1 != None:

1331

result[record.key] = record.sha1

1332

else:

1333

if record.storage_kind != 'absent':

1334

result[record.key] = osutils.sha_string(

1335

record.get_bytes_as('fulltext'))

1336

return result

1337

1338

def insert_record_stream(self, stream):

1339

"""Insert a record stream into this container.

1340

1341

:param stream: A stream of records to insert.

1342

:return: None

1343

:seealso VersionedFiles.get_record_stream:

1344

"""

1345

# XXX: Setting random_id=True makes

1346

# test_insert_record_stream_existing_keys fail for groupcompress and

1347

# groupcompress-nograph, this needs to be revisited while addressing

1348

# 'bzr branch' performance issues.

1349

for _ in self._insert_record_stream(stream, random_id=False):

1350

pass

1351

1352

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1353

reuse_blocks=True):

1354

"""Internal core to insert a record stream into this container.

1355

1356

This helper function has a different interface than insert_record_stream

1357

to allow add_lines to be minimal, but still return the needed data.

1358

1359

:param stream: A stream of records to insert.

1360

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1361

raise ExistingContent, rather than committing the new text.

1362

:param reuse_blocks: If the source is streaming from

1363

groupcompress-blocks, just insert the blocks as-is, rather than

1364

expanding the texts and inserting again.

1365

:return: An iterator over the sha1 of the inserted records.

1366

:seealso insert_record_stream:

1367

:seealso add_lines:

1368

"""

1369

adapters = {}

1370

def get_adapter(adapter_key):

1371

try:

1372

return adapters[adapter_key]

1373

except KeyError:

1374

adapter_factory = adapter_registry.get(adapter_key)

1375

adapter = adapter_factory(self)

1376

adapters[adapter_key] = adapter

1377

return adapter

1378

# This will go up to fulltexts for gc to gc fetching, which isn't

1379

# ideal.

1380

self._compressor = GroupCompressor()

1381

self._unadded_refs = {}

1382

keys_to_add = []

1383

def flush():

1384

bytes = self._compressor.flush().to_bytes()

1385

index, start, length = self._access.add_raw_records(

1386

[(None, len(bytes))], bytes)[0]

1387

nodes = []

1388

for key, reads, refs in keys_to_add:

1389

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1390

self._index.add_records(nodes, random_id=random_id)

1391

self._unadded_refs = {}

1392

del keys_to_add[:]

1393

self._compressor = GroupCompressor()

1394

1395

last_prefix = None

1396

max_fulltext_len = 0

1397

max_fulltext_prefix = None

1398

insert_manager = None

1399

block_start = None

1400

block_length = None

1401

# XXX: TODO: remove this, it is just for safety checking for now

1402

inserted_keys = set()

1403

for record in stream:

1404

# Raise an error when a record is missing.

1405

if record.storage_kind == 'absent':

1406

raise errors.RevisionNotPresent(record.key, self)

1407

if random_id:

1408

if record.key in inserted_keys:

1409

trace.note('Insert claimed random_id=True,'

1410

' but then inserted %r two times', record.key)

1411

continue

1412

inserted_keys.add(record.key)

1413

if reuse_blocks:

1414

# If the reuse_blocks flag is set, check to see if we can just

1415

# copy a groupcompress block as-is.

1416

if record.storage_kind == 'groupcompress-block':

1417

# Insert the raw block into the target repo

1418

insert_manager = record._manager

1419

insert_manager._check_rebuild_block()

1420

bytes = record._manager._block.to_bytes()

1421

_, start, length = self._access.add_raw_records(

1422

[(None, len(bytes))], bytes)[0]

1423

del bytes

1424

block_start = start

1425

block_length = length

1426

if record.storage_kind in ('groupcompress-block',

1427

'groupcompress-block-ref'):

1428

if insert_manager is None:

1429

raise AssertionError('No insert_manager set')

1430

value = "%d %d %d %d" % (block_start, block_length,

1431

record._start, record._end)

1432

nodes = [(record.key, value, (record.parents,))]

1433

# TODO: Consider buffering up many nodes to be added, not

1434

# sure how much overhead this has, but we're seeing

1435

# ~23s / 120s in add_records calls

1436

self._index.add_records(nodes, random_id=random_id)

1437

continue

1438

try:

1439

bytes = record.get_bytes_as('fulltext')

1440

except errors.UnavailableRepresentation:

1441

adapter_key = record.storage_kind, 'fulltext'

1442

adapter = get_adapter(adapter_key)

1443

bytes = adapter.get_bytes(record)

1444

if len(record.key) > 1:

1445

prefix = record.key[0]

1446

soft = (prefix == last_prefix)

1447

else:

1448

prefix = None

1449

soft = False

1450

if max_fulltext_len < len(bytes):

1451

max_fulltext_len = len(bytes)

1452

max_fulltext_prefix = prefix

1453

(found_sha1, start_point, end_point,

1454

type) = self._compressor.compress(record.key,

1455

bytes, record.sha1, soft=soft,

1456

nostore_sha=nostore_sha)

1457

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1458

# Check if we want to continue to include that text

1459

if (prefix == max_fulltext_prefix

1460

and end_point < 2 * max_fulltext_len):

1461

# As long as we are on the same file_id, we will fill at least

1462

# 2 * max_fulltext_len

1463

start_new_block = False

1464

elif end_point > 4*1024*1024:

1465

start_new_block = True

1466

elif (prefix is not None and prefix != last_prefix

1467

and end_point > 2*1024*1024):

1468

start_new_block = True

1469

else:

1470

start_new_block = False

1471

last_prefix = prefix

1472

if start_new_block:

1473

self._compressor.pop_last()

1474

flush()

1475

max_fulltext_len = len(bytes)

1476

(found_sha1, start_point, end_point,

1477

type) = self._compressor.compress(record.key, bytes,

1478

record.sha1)

1479

if record.key[-1] is None:

1480

key = record.key[:-1] + ('sha1:' + found_sha1,)

1481

else:

1482

key = record.key

1483

self._unadded_refs[key] = record.parents

1484

yield found_sha1

1485

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1486

(record.parents,)))

1487

if len(keys_to_add):

1488

flush()

1489

self._compressor = None

1490

1491

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1492

"""Iterate over the lines in the versioned files from keys.

1493

1494

This may return lines from other keys. Each item the returned

1495

iterator yields is a tuple of a line and a text version that that line

1496

is present in (not introduced in).

1497

1498

Ordering of results is in whatever order is most suitable for the

1499

underlying storage format.

1500

1501

If a progress bar is supplied, it may be used to indicate progress.

1502

The caller is responsible for cleaning up progress bars (because this

1503

is an iterator).

1504

1505

NOTES:

1506

* Lines are normalised by the underlying store: they will all have \n

1507

terminators.

1508

* Lines are returned in arbitrary order.

1509

1510

:return: An iterator over (line, key).

1511

"""

1512

if pb is None:

1513

pb = progress.DummyProgress()

1514

keys = set(keys)

1515

total = len(keys)

1516

# we don't care about inclusions, the caller cares.

1517

# but we need to setup a list of records to visit.

1518

# we need key, position, length

1519

for key_idx, record in enumerate(self.get_record_stream(keys,

1520

'unordered', True)):

1521

# XXX: todo - optimise to use less than full texts.

1522

key = record.key

1523

pb.update('Walking content', key_idx, total)

1524

if record.storage_kind == 'absent':

1525

raise errors.RevisionNotPresent(key, self)

1526

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1527

for line in lines:

1528

yield line, key

1529

pb.update('Walking content', total, total)

1530

1531

def keys(self):

1532

"""See VersionedFiles.keys."""

1533

if 'evil' in debug.debug_flags:

1534

trace.mutter_callsite(2, "keys scales with size of history")

1535

sources = [self._index] + self._fallback_vfs

1536

result = set()

1537

for source in sources:

1538

result.update(source.keys())

1539

return result

1540

1541

1542

class _GCGraphIndex(object):

1543

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1544

1545

def __init__(self, graph_index, is_locked, parents=True,

1546

add_callback=None):

1547

"""Construct a _GCGraphIndex on a graph_index.

1548

1549

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1550

:param is_locked: A callback, returns True if the index is locked and

1551

thus usable.

1552

:param parents: If True, record knits parents, if not do not record

1553

parents.

1554

:param add_callback: If not None, allow additions to the index and call

1555

this callback with a list of added GraphIndex nodes:

1556

[(node, value, node_refs), ...]

1557

"""

1558

self._add_callback = add_callback

1559

self._graph_index = graph_index

1560

self._parents = parents

1561

self.has_graph = parents

1562

self._is_locked = is_locked

1563

1564

def add_records(self, records, random_id=False):

1565

"""Add multiple records to the index.

1566

1567

This function does not insert data into the Immutable GraphIndex

1568

backing the KnitGraphIndex, instead it prepares data for insertion by

1569

the caller and checks that it is safe to insert then calls

1570

self._add_callback with the prepared GraphIndex nodes.

1571

1572

:param records: a list of tuples:

1573

(key, options, access_memo, parents).

1574

:param random_id: If True the ids being added were randomly generated

1575

and no check for existence will be performed.

1576

"""

1577

if not self._add_callback:

1578

raise errors.ReadOnlyError(self)

1579

# we hope there are no repositories with inconsistent parentage

1580

# anymore.

1581

1582

changed = False

1583

keys = {}

1584

for (key, value, refs) in records:

1585

if not self._parents:

1586

if refs:

1587

for ref in refs:

1588

if ref:

1589

raise KnitCorrupt(self,

1590

"attempt to add node with parents "

1591

"in parentless index.")

1592

refs = ()

1593

changed = True

1594

keys[key] = (value, refs)

1595

# check for dups

1596

if not random_id:

1597

present_nodes = self._get_entries(keys)

1598

for (index, key, value, node_refs) in present_nodes:

1599

if node_refs != keys[key][1]:

1600

raise errors.KnitCorrupt(self, "inconsistent details in add_records"

1601

": %s %s" % ((value, node_refs), keys[key]))

1602

del keys[key]

1603

changed = True

1604

if changed:

1605

result = []

1606

if self._parents:

1607

for key, (value, node_refs) in keys.iteritems():

1608

result.append((key, value, node_refs))

1609

else:

1610

for key, (value, node_refs) in keys.iteritems():

1611

result.append((key, value))

1612

records = result

1613

self._add_callback(records)

1614

1615

def _check_read(self):

1616

"""Raise an exception if reads are not permitted."""

1617

if not self._is_locked():

1618

raise errors.ObjectNotLocked(self)

1619

1620

def _check_write_ok(self):

1621

"""Raise an exception if writes are not permitted."""

1622

if not self._is_locked():

1623

raise errors.ObjectNotLocked(self)

1624

1625

def _get_entries(self, keys, check_present=False):

1626

"""Get the entries for keys.

1627

1628

Note: Callers are responsible for checking that the index is locked

1629

before calling this method.

1630

1631

:param keys: An iterable of index key tuples.

1632

"""

1633

keys = set(keys)

1634

found_keys = set()

1635

if self._parents:

1636

for node in self._graph_index.iter_entries(keys):

1637

yield node

1638

found_keys.add(node[1])

1639

else:

1640

# adapt parentless index to the rest of the code.

1641

for node in self._graph_index.iter_entries(keys):

1642

yield node[0], node[1], node[2], ()

1643

found_keys.add(node[1])

1644

if check_present:

1645

missing_keys = keys.difference(found_keys)

1646

if missing_keys:

1647

raise RevisionNotPresent(missing_keys.pop(), self)

1648

1649

def get_parent_map(self, keys):

1650

"""Get a map of the parents of keys.

1651

1652

:param keys: The keys to look up parents for.

1653

:return: A mapping from keys to parents. Absent keys are absent from

1654

the mapping.

1655

"""

1656

self._check_read()

1657

nodes = self._get_entries(keys)

1658

result = {}

1659

if self._parents:

1660

for node in nodes:

1661

result[node[1]] = node[3][0]

1662

else:

1663

for node in nodes:

1664

result[node[1]] = None

1665

return result

1666

1667

def get_build_details(self, keys):

1668

"""Get the various build details for keys.

1669

1670

Ghosts are omitted from the result.

1671

1672

:param keys: An iterable of keys.

1673

:return: A dict of key:

1674

(index_memo, compression_parent, parents, record_details).

1675

index_memo

1676

opaque structure to pass to read_records to extract the raw

1677

data

1678

compression_parent

1679

Content that this record is built upon, may be None

1680

parents

1681

Logical parents of this node

1682

record_details

1683

extra information about the content which needs to be passed to

1684

Factory.parse_record

1685

"""

1686

self._check_read()

1687

result = {}

1688

entries = self._get_entries(keys)

1689

for entry in entries:

1690

key = entry[1]

1691

if not self._parents:

1692

parents = None

1693

else:

1694

parents = entry[3][0]

1695

method = 'group'

1696

result[key] = (self._node_to_position(entry),

1697

None, parents, (method, None))

1698

return result

1699

1700

def keys(self):

1701

"""Get all the keys in the collection.

1702

1703

The keys are not ordered.

1704

"""

1705

self._check_read()

1706

return [node[1] for node in self._graph_index.iter_all_entries()]

1707

1708

def _node_to_position(self, node):

1709

"""Convert an index value to position details."""

1710

bits = node[2].split(' ')

1711

# It would be nice not to read the entire gzip.

1712

start = int(bits[0])

1713

stop = int(bits[1])

1714

basis_end = int(bits[2])

1715

delta_end = int(bits[3])

1716

return node[0], start, stop, basis_end, delta_end

1717

1718

1719

from bzrlib._groupcompress_py import (

1720

apply_delta,

1721

apply_delta_to_source,

1722

encode_base128_int,

1723

decode_base128_int,

1724

decode_copy_instruction,

1725

LinesDeltaIndex,

1726

)

1727

try:

1728

from bzrlib._groupcompress_pyx import (

1729

apply_delta,

1730

apply_delta_to_source,

1731

DeltaIndex,

1732

encode_base128_int,

1733

decode_base128_int,

1734

)

1735

GroupCompressor = PyrexGroupCompressor

1736

except ImportError:

1737

GroupCompressor = PythonGroupCompressor

1738

Older »