~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Date: 2005-09-15 21:35:53 UTC
mfrom: (907.1.57)
mto: (1393.2.1)
mto: This revision was merged to the branch mainline in revision 1396.
Revision ID: john@arbash-meinel.com-20050915213552-a6c83a5ef1e20897

(broken) Transport work is merged in. Tests do not pass yet.

files added:
build-api

bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/selftest/testremotebranch.py

bzrlib/store/compressed_text.py

bzrlib/upgrade.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

bzr.ico

bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/annotate.py

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/email_message.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/sign_my_commits.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store/revision

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_guess_renames.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/nosmart.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave_commands.py

bzrlib/win32utils.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

bzrlib/xml8.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

generate_docs.py

man1

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/capture_tree.py

tools/check-newsbugs.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
doc/developers/HACKING.txt => HACKING

tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

bzrlib/deprecated_graph.py => bzrlib/graph.py

contrib/newinventory.py => bzrlib/newinventory.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/http_utils.py => bzrlib/selftest/HTTPTestUtil.py

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_parent.py => bzrlib/selftest/test_parent.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_fetch.py => bzrlib/selftest/testfetch.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/selftest/testgraph.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge.py => bzrlib/selftest/testmerge.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/test_store.py => bzrlib/selftest/teststore.py

bzrlib/tests/test_transport.py => bzrlib/selftest/testtransport.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/transport/http/__init__.py => bzrlib/transport/http.py

bzrlib/ui/__init__.py => bzrlib/ui.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/tests/test_weave.py => tools/testweave.py

doc/en/tutorials/tutorial.txt => tutorial.txt

files modified:
.bzrignore

.rsyncexclude

Makefile

NEWS

README

TODO

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

contrib/bash/bzr.simple

contrib/pwclient.full

contrib/pwk

contrib/zsh/_bzr

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

from itertools import izip

from cStringIO import StringIO

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

diff,

errors,

graph as _mod_graph,

osutils,

pack,

patiencediff,

trace,

)

from bzrlib.graph import Graph

from bzrlib.knit import _DirectPackAccess

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for item in parent_map.iteritems():

key = item[0]

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix].append(item)

except KeyError:

per_prefix_map[prefix] = [item]

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

GCB_LZ_HEADER = 'gcb1l\n'

100

101

def __init__(self):

102

# map by key? or just order in file?

103

self._compressor_name = None

104

self._z_content = None

105

self._z_content_decompressor = None

106

self._z_content_length = None

107

self._content_length = None

108

self._content = None

109

110

def __len__(self):

111

# This is the maximum number of bytes this object will reference if

112

# everything is decompressed. However, if we decompress less than

113

# everything... (this would cause some problems for LRUSizeCache)

114

return self._content_length + self._z_content_length

115

116

def _ensure_content(self, num_bytes=None):

117

"""Make sure that content has been expanded enough.

118

119

:param num_bytes: Ensure that we have extracted at least num_bytes of

120

content. If None, consume everything

121

"""

122

# TODO: If we re-use the same content block at different times during

123

# get_record_stream(), it is possible that the first pass will

124

# get inserted, triggering an extract/_ensure_content() which

125

# will get rid of _z_content. And then the next use of the block

126

# will try to access _z_content (to send it over the wire), and

127

# fail because it is already extracted. Consider never releasing

128

# _z_content because of this.

129

if num_bytes is None:

130

num_bytes = self._content_length

131

if self._content_length is not None:

132

assert num_bytes <= self._content_length

133

if self._content is None:

134

assert self._z_content is not None

135

if self._z_content == '':

136

self._content = ''

137

elif self._compressor_name == 'lzma':

138

# We don't do partial lzma decomp yet

139

self._content = pylzma.decompress(self._z_content)

140

else:

141

# Start a zlib decompressor

142

assert self._compressor_name == 'zlib'

143

if num_bytes is None:

144

self._content = zlib.decompress(self._z_content)

145

else:

146

self._z_content_decompressor = zlib.decompressobj()

147

# Seed the decompressor with the uncompressed bytes, so

148

# that the rest of the code is simplified

149

self._content = self._z_content_decompressor.decompress(

150

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

151

# Any bytes remaining to be decompressed will be in the

152

# decompressors 'unconsumed_tail'

153

# Do we have enough bytes already?

154

if num_bytes is not None and len(self._content) >= num_bytes:

155

return

156

if num_bytes is None and self._z_content_decompressor is None:

157

# We must have already decompressed everything

158

return

159

# If we got this far, and don't have a decompressor, something is wrong

160

assert self._z_content_decompressor is not None

161

remaining_decomp = self._z_content_decompressor.unconsumed_tail

162

if num_bytes is None:

163

if remaining_decomp:

164

# We don't know how much is left, but we'll decompress it all

165

self._content += self._z_content_decompressor.decompress(

166

remaining_decomp)

167

# Note: There what I consider a bug in zlib.decompressobj

168

# If you pass back in the entire unconsumed_tail, only

169

# this time you don't pass a max-size, it doesn't

170

# change the unconsumed_tail back to None/''.

171

# However, we know we are done with the whole stream

172

self._z_content_decompressor = None

173

self._content_length = len(self._content)

174

else:

175

# If we have nothing left to decomp, we ran out of decomp bytes

176

assert remaining_decomp

177

needed_bytes = num_bytes - len(self._content)

178

# We always set max_size to 32kB over the minimum needed, so that

179

# zlib will give us as much as we really want.

180

# TODO: If this isn't good enough, we could make a loop here,

181

# that keeps expanding the request until we get enough

182

self._content += self._z_content_decompressor.decompress(

183

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

184

assert len(self._content) >= num_bytes

185

if not self._z_content_decompressor.unconsumed_tail:

186

# The stream is finished

187

self._z_content_decompressor = None

188

189

def _parse_bytes(self, bytes, pos):

190

"""Read the various lengths from the header.

191

192

This also populates the various 'compressed' buffers.

193

194

:return: The position in bytes just after the last newline

195

"""

196

# At present, we have 2 integers for the compressed and uncompressed

197

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

198

# checking too far, cap the search to 14 bytes.

199

pos2 = bytes.index('\n', pos, pos + 14)

200

self._z_content_length = int(bytes[pos:pos2])

201

pos = pos2 + 1

202

pos2 = bytes.index('\n', pos, pos + 14)

203

self._content_length = int(bytes[pos:pos2])

204

pos = pos2 + 1

205

assert len(bytes) == (pos + self._z_content_length)

206

self._z_content = bytes[pos:]

207

assert len(self._z_content) == self._z_content_length

208

209

@classmethod

210

def from_bytes(cls, bytes):

211

out = cls()

212

if bytes[:6] not in (cls.GCB_HEADER, cls.GCB_LZ_HEADER):

213

raise ValueError('bytes did not start with %r' % (cls.GCB_HEADER,))

214

if bytes[4] == 'z':

215

out._compressor_name = 'zlib'

216

elif bytes[4] == 'l':

217

out._compressor_name = 'lzma'

218

else:

219

raise ValueError('unknown compressor: %r' % (bytes,))

220

out._parse_bytes(bytes, 6)

221

return out

222

223

def extract(self, key, start, end, sha1=None):

224

"""Extract the text for a specific key.

225

226

:param key: The label used for this content

227

:param sha1: TODO (should we validate only when sha1 is supplied?)

228

:return: The bytes for the content

229

"""

230

if start == end == 0:

231

return ''

232

self._ensure_content(end)

233

# The bytes are 'f' or 'd' for the type, then a variable-length

234

# base128 integer for the content size, then the actual content

235

# We know that the variable-length integer won't be longer than 5

236

# bytes (it takes 5 bytes to encode 2^32)

237

c = self._content[start]

238

if c == 'f':

239

type = 'fulltext'

240

else:

241

if c != 'd':

242

raise ValueError('Unknown content control code: %s'

243

% (c,))

244

type = 'delta'

245

content_len, len_len = decode_base128_int(

246

self._content[start + 1:start + 6])

247

content_start = start + 1 + len_len

248

if end != content_start + content_len:

249

raise ValueError('end != len according to field header'

250

' %s != %s' % (end, content_start + content_len))

251

if c == 'f':

252

bytes = self._content[content_start:end]

253

elif c == 'd':

254

bytes = apply_delta_to_source(self._content, content_start, end)

255

return bytes

256

257

def set_content(self, content):

258

"""Set the content of this block."""

259

self._content_length = len(content)

260

self._content = content

261

self._z_content = None

262

263

def to_bytes(self):

264

"""Encode the information into a byte stream."""

265

compress = zlib.compress

266

if _USE_LZMA:

267

compress = pylzma.compress

268

if self._z_content is None:

269

assert self._content is not None

270

self._z_content = compress(self._content)

271

self._z_content_length = len(self._z_content)

272

if _USE_LZMA:

273

header = self.GCB_LZ_HEADER

274

else:

275

header = self.GCB_HEADER

276

chunks = [header,

277

'%d\n%d\n' % (self._z_content_length, self._content_length),

278

self._z_content,

279

]

280

return ''.join(chunks)

281

282

283

class _LazyGroupCompressFactory(object):

284

"""Yield content from a GroupCompressBlock on demand."""

285

286

def __init__(self, key, parents, manager, start, end, first):

287

"""Create a _LazyGroupCompressFactory

288

289

:param key: The key of just this record

290

:param parents: The parents of this key (possibly None)

291

:param gc_block: A GroupCompressBlock object

292

:param start: Offset of the first byte for this record in the

293

uncompressd content

294

:param end: Offset of the byte just after the end of this record

295

(ie, bytes = content[start:end])

296

:param first: Is this the first Factory for the given block?

297

"""

298

self.key = key

299

self.parents = parents

300

self.sha1 = None

301

# Note: This attribute coupled with Manager._factories creates a

302

# reference cycle. Perhaps we would rather use a weakref(), or

303

# find an appropriate time to release the ref. After the first

304

# get_bytes_as call? After Manager.get_record_stream() returns

305

# the object?

306

self._manager = manager

307

self._bytes = None

308

self.storage_kind = 'groupcompress-block'

309

if not first:

310

self.storage_kind = 'groupcompress-block-ref'

311

self._first = first

312

self._start = start

313

self._end = end

314

315

def __repr__(self):

316

return '%s(%s, first=%s)' % (self.__class__.__name__,

317

self.key, self._first)

318

319

def get_bytes_as(self, storage_kind):

320

if storage_kind == self.storage_kind:

321

if self._first:

322

# wire bytes, something...

323

return self._manager._wire_bytes()

324

else:

325

return ''

326

if storage_kind in ('fulltext', 'chunked'):

327

if self._bytes is None:

328

# Grab and cache the raw bytes for this entry

329

# and break the ref-cycle with _manager since we don't need it

330

# anymore

331

self._manager._prepare_for_extract()

332

block = self._manager._block

333

self._bytes = block.extract(self.key, self._start, self._end)

334

# XXX: It seems the smart fetch extracts inventories and chk

335

# pages as fulltexts to find the next chk pages, but then

336

# passes them down to be inserted as a

337

# groupcompress-block, so this is not safe to do. Perhaps

338

# we could just change the storage kind to "fulltext" at

339

# that point?

340

# self._manager = None

341

if storage_kind == 'fulltext':

342

return self._bytes

343

else:

344

return [self._bytes]

345

raise errors.UnavailableRepresentation(self.key, storage_kind,

346

self.storage_kind)

347

348

349

class _LazyGroupContentManager(object):

350

"""This manages a group of _LazyGroupCompressFactory objects."""

351

352

def __init__(self, block):

353

self._block = block

354

# We need to preserve the ordering

355

self._factories = []

356

self._last_byte = 0

357

358

def add_factory(self, key, parents, start, end):

359

if not self._factories:

360

first = True

361

else:

362

first = False

363

# Note that this creates a reference cycle....

364

factory = _LazyGroupCompressFactory(key, parents, self,

365

start, end, first=first)

366

# max() works here, but as a function call, doing a compare seems to be

367

# significantly faster, timeit says 250ms for max() and 100ms for the

368

# comparison

369

if end > self._last_byte:

370

self._last_byte = end

371

self._factories.append(factory)

372

373

def get_record_stream(self):

374

"""Get a record for all keys added so far."""

375

for factory in self._factories:

376

yield factory

377

# Break the ref-cycle

378

factory._bytes = None

379

# XXX: this is not safe, the smart fetch code requests the content

380

# as both a 'fulltext', and then later on as a

381

# groupcompress-block. The iter_interesting_nodes code also is

382

# still buffering multiple records and returning them later.

383

# So that code would need to be updated to either re-fetch the

384

# original object, or buffer it somehow.

385

# factory._manager = None

386

# TODO: Consider setting self._factories = None after the above loop,

387

# as it will break the reference cycle

388

389

def _trim_block(self, last_byte):

390

"""Create a new GroupCompressBlock, with just some of the content."""

391

# None of the factories need to be adjusted, because the content is

392

# located in an identical place. Just that some of the unreferenced

393

# trailing bytes are stripped

394

trace.mutter('stripping trailing bytes from groupcompress block'

395

' %d => %d', self._block._content_length, last_byte)

396

new_block = GroupCompressBlock()

397

self._block._ensure_content(last_byte)

398

new_block.set_content(self._block._content[:last_byte])

399

self._block = new_block

400

401

def _rebuild_block(self):

402

"""Create a new GroupCompressBlock with only the referenced texts."""

403

compressor = GroupCompressor()

404

tstart = time.time()

405

old_length = self._block._content_length

406

end_point = 0

407

for factory in self._factories:

408

bytes = factory.get_bytes_as('fulltext')

409

(found_sha1, start_point, end_point, type,

410

length) = compressor.compress(factory.key, bytes, factory.sha1)

411

# Now update this factory with the new offsets, etc

412

factory.sha1 = found_sha1

413

factory._start = start_point

414

factory._end = end_point

415

self._last_byte = end_point

416

new_block = compressor.flush()

417

# TODO: Should we check that new_block really *is* smaller than the old

418

# block? It seems hard to come up with a method that it would

419

# expand, since we do full compression again. Perhaps based on a

420

# request that ends up poorly ordered?

421

delta = time.time() - tstart

422

self._block = new_block

423

trace.mutter('creating new compressed block on-the-fly in %.3fs'

424

' %d bytes => %d bytes', delta, old_length,

425

self._block._content_length)

426

427

def _prepare_for_extract(self):

428

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

429

# We expect that if one child is going to fulltext, all will be. This

430

# helps prevent all of them from extracting a small amount at a time.

431

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

432

# time (self._block._content) is a little expensive.

433

self._block._ensure_content(self._last_byte)

434

435

def _check_rebuild_block(self):

436

"""Check to see if our block should be repacked."""

437

total_bytes_used = 0

438

last_byte_used = 0

439

for factory in self._factories:

440

total_bytes_used += factory._end - factory._start

441

last_byte_used = max(last_byte_used, factory._end)

442

# If we are using most of the bytes from the block, we have nothing

443

# else to check (currently more that 1/2)

444

if total_bytes_used * 2 >= self._block._content_length:

445

return

446

# Can we just strip off the trailing bytes? If we are going to be

447

# transmitting more than 50% of the front of the content, go ahead

448

if total_bytes_used * 2 > last_byte_used:

449

self._trim_block(last_byte_used)

450

return

451

452

# We are using a small amount of the data, and it isn't just packed

453

# nicely at the front, so rebuild the content.

454

# Note: This would be *nicer* as a strip-data-from-group, rather than

455

# building it up again from scratch

456

# It might be reasonable to consider the fulltext sizes for

457

# different bits when deciding this, too. As you may have a small

458

# fulltext, and a trivial delta, and you are just trading around

459

# for another fulltext. If we do a simple 'prune' you may end up

460

# expanding many deltas into fulltexts, as well.

461

# If we build a cheap enough 'strip', then we could try a strip,

462

# if that expands the content, we then rebuild.

463

self._rebuild_block()

464

465

def _wire_bytes(self):

466

"""Return a byte stream suitable for transmitting over the wire."""

467

self._check_rebuild_block()

468

# The outer block starts with:

469

# 'groupcompress-block\n'

470

# <length of compressed key info>\n

471

# <length of uncompressed info>\n

472

# <length of gc block>\n

473

# <header bytes>

474

# <gc-block>

475

lines = ['groupcompress-block\n']

476

# The minimal info we need is the key, the start offset, and the

477

# parents. The length and type are encoded in the record itself.

478

# However, passing in the other bits makes it easier. The list of

479

# keys, and the start offset, the length

480

# 1 line key

481

# 1 line with parents, '' for ()

482

# 1 line for start offset

483

# 1 line for end byte

484

header_lines = []

485

for factory in self._factories:

486

key_bytes = '\x00'.join(factory.key)

487

parents = factory.parents

488

if parents is None:

489

parent_bytes = 'None:'

490

else:

491

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

492

record_header = '%s\n%s\n%d\n%d\n' % (

493

key_bytes, parent_bytes, factory._start, factory._end)

494

header_lines.append(record_header)

495

header_bytes = ''.join(header_lines)

496

del header_lines

497

header_bytes_len = len(header_bytes)

498

z_header_bytes = zlib.compress(header_bytes)

499

del header_bytes

500

z_header_bytes_len = len(z_header_bytes)

501

block_bytes = self._block.to_bytes()

502

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

503

len(block_bytes)))

504

lines.append(z_header_bytes)

505

lines.append(block_bytes)

506

del z_header_bytes, block_bytes

507

return ''.join(lines)

508

509

@classmethod

510

def from_bytes(cls, bytes):

511

# TODO: This does extra string copying, probably better to do it a

512

# different way

513

(storage_kind, z_header_len, header_len,

514

block_len, rest) = bytes.split('\n', 4)

515

del bytes

516

if storage_kind != 'groupcompress-block':

517

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

518

z_header_len = int(z_header_len)

519

if len(rest) < z_header_len:

520

raise ValueError('Compressed header len shorter than all bytes')

521

z_header = rest[:z_header_len]

522

header_len = int(header_len)

523

header = zlib.decompress(z_header)

524

if len(header) != header_len:

525

raise ValueError('invalid length for decompressed bytes')

526

del z_header

527

block_len = int(block_len)

528

if len(rest) != z_header_len + block_len:

529

raise ValueError('Invalid length for block')

530

block_bytes = rest[z_header_len:]

531

del rest

532

# So now we have a valid GCB, we just need to parse the factories that

533

# were sent to us

534

header_lines = header.split('\n')

535

del header

536

last = header_lines.pop()

537

if last != '':

538

raise ValueError('header lines did not end with a trailing'

539

' newline')

540

if len(header_lines) % 4 != 0:

541

raise ValueError('The header was not an even multiple of 4 lines')

542

block = GroupCompressBlock.from_bytes(block_bytes)

543

del block_bytes

544

result = cls(block)

545

for start in xrange(0, len(header_lines), 4):

546

# intern()?

547

key = tuple(header_lines[start].split('\x00'))

548

parents_line = header_lines[start+1]

549

if parents_line == 'None:':

550

parents = None

551

else:

552

parents = tuple([tuple(segment.split('\x00'))

553

for segment in parents_line.split('\t')

554

if segment])

555

start_offset = int(header_lines[start+2])

556

end_offset = int(header_lines[start+3])

557

result.add_factory(key, parents, start_offset, end_offset)

558

return result

559

560

561

def network_block_to_records(storage_kind, bytes, line_end):

562

if storage_kind != 'groupcompress-block':

563

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

564

manager = _LazyGroupContentManager.from_bytes(bytes)

565

return manager.get_record_stream()

566

567

568

class _CommonGroupCompressor(object):

569

570

def __init__(self):

571

"""Create a GroupCompressor."""

572

self.chunks = []

573

self._last = None

574

self.endpoint = 0

575

self.input_bytes = 0

576

self.labels_deltas = {}

577

self._delta_index = None # Set by the children

578

self._block = GroupCompressBlock()

579

580

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

581

"""Compress lines with label key.

582

583

:param key: A key tuple. It is stored in the output

584

for identification of the text during decompression. If the last

585

element is 'None' it is replaced with the sha1 of the text -

586

e.g. sha1:xxxxxxx.

587

:param bytes: The bytes to be compressed

588

:param expected_sha: If non-None, the sha the lines are believed to

589

have. During compression the sha is calculated; a mismatch will

590

cause an error.

591

:param nostore_sha: If the computed sha1 sum matches, we will raise

592

ExistingContent rather than adding the text.

593

:param soft: Do a 'soft' compression. This means that we require larger

594

ranges to match to be considered for a copy command.

595

596

:return: The sha1 of lines, the start and end offsets in the delta, the

597

type ('fulltext' or 'delta') and the number of bytes accumulated in

598

the group output so far.

599

600

:seealso VersionedFiles.add_lines:

601

"""

602

if not bytes: # empty, like a dir entry, etc

603

if nostore_sha == _null_sha1:

604

raise errors.ExistingContent()

605

return _null_sha1, 0, 0, 'fulltext', 0

606

# we assume someone knew what they were doing when they passed it in

607

if expected_sha is not None:

608

sha1 = expected_sha

609

else:

610

sha1 = osutils.sha_string(bytes)

611

if nostore_sha is not None:

612

if sha1 == nostore_sha:

613

raise errors.ExistingContent()

614

if key[-1] is None:

615

key = key[:-1] + ('sha1:' + sha1,)

616

617

return self._compress(key, bytes, sha1, len(bytes) / 2, soft)

618

619

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

620

"""Compress lines with label key.

621

622

:param key: A key tuple. It is stored in the output for identification

623

of the text during decompression.

624

625

:param bytes: The bytes to be compressed

626

627

:param sha1: The sha1 for 'bytes'.

628

629

:param max_delta_size: The size above which we issue a fulltext instead

630

of a delta.

631

632

:param soft: Do a 'soft' compression. This means that we require larger

633

ranges to match to be considered for a copy command.

634

635

:return: The sha1 of lines, the start and end offsets in the delta, the

636

type ('fulltext' or 'delta') and the number of bytes accumulated in

637

the group output so far.

638

"""

639

raise NotImplementedError(self._compress)

640

641

def extract(self, key):

642

"""Extract a key previously added to the compressor.

643

644

:param key: The key to extract.

645

:return: An iterable over bytes and the sha1.

646

"""

647

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

648

delta_chunks = self.chunks[start_chunk:end_chunk]

649

stored_bytes = ''.join(delta_chunks)

650

if stored_bytes[0] == 'f':

651

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

652

data_len = fulltext_len + 1 + offset

653

if data_len != len(stored_bytes):

654

raise ValueError('Index claimed fulltext len, but stored bytes'

655

' claim %s != %s'

656

% (len(stored_bytes), data_len))

657

bytes = stored_bytes[offset + 1:]

658

else:

659

# XXX: This is inefficient at best

660

source = ''.join(self.chunks[:start_chunk])

661

if stored_bytes[0] != 'd':

662

raise ValueError('Unknown content kind, bytes claim %s'

663

% (stored_bytes[0],))

664

delta_len, offset = decode_base128_int(stored_bytes[1:10])

665

data_len = delta_len + 1 + offset

666

if data_len != len(stored_bytes):

667

raise ValueError('Index claimed delta len, but stored bytes'

668

' claim %s != %s'

669

% (len(stored_bytes), data_len))

670

bytes = apply_delta(source, stored_bytes[offset + 1:])

671

bytes_sha1 = osutils.sha_string(bytes)

672

return bytes, bytes_sha1

673

674

def flush(self):

675

"""Finish this group, creating a formatted stream.

676

677

After calling this, the compressor should no longer be used

678

"""

679

content = ''.join(self.chunks)

680

self.chunks = None

681

self._delta_index = None

682

self._block.set_content(content)

683

return self._block

684

685

def pop_last(self):

686

"""Call this if you want to 'revoke' the last compression.

687

688

After this, the data structures will be rolled back, but you cannot do

689

more compression.

690

"""

691

self._delta_index = None

692

del self.chunks[self._last[0]:]

693

self.endpoint = self._last[1]

694

self._last = None

695

696

def ratio(self):

697

"""Return the overall compression ratio."""

698

return float(self.input_bytes) / float(self.endpoint)

699

700

701

class PythonGroupCompressor(_CommonGroupCompressor):

702

703

def __init__(self):

704

"""Create a GroupCompressor.

705

706

:param delta: If False, do not compress records.

707

"""

708

super(PythonGroupCompressor, self).__init__()

709

self._delta_index = LinesDeltaIndex([])

710

# The actual content is managed by LinesDeltaIndex

711

self.chunks = self._delta_index.lines

712

713

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

714

"""see _CommonGroupCompressor._compress"""

715

bytes_length = len(bytes)

716

new_lines = osutils.split_lines(bytes)

717

out_lines, index_lines = self._delta_index.make_delta(new_lines,

718

bytes_length=bytes_length, soft=soft)

719

delta_length = sum(map(len, out_lines))

720

if delta_length > max_delta_size:

721

# The delta is longer than the fulltext, insert a fulltext

722

type = 'fulltext'

723

out_lines = ['f', encode_base128_int(bytes_length)]

724

out_lines.extend(new_lines)

725

index_lines = [False, False]

726

index_lines.extend([True] * len(new_lines))

727

out_length = len(out_lines[1]) + bytes_length + 1

728

else:

729

# this is a worthy delta, output it

730

type = 'delta'

731

out_lines[0] = 'd'

732

# Update the delta_length to include those two encoded integers

733

out_lines[1] = encode_base128_int(delta_length)

734

out_length = len(out_lines[3]) + 1 + delta_length

735

start = self.endpoint # Before insertion

736

chunk_start = len(self._delta_index.lines)

737

self._delta_index.extend_lines(out_lines, index_lines)

738

self.endpoint = self._delta_index.endpoint

739

self.input_bytes += bytes_length

740

chunk_end = len(self._delta_index.lines)

741

self.labels_deltas[key] = (start, chunk_start,

742

self.endpoint, chunk_end)

743

return sha1, start, self.endpoint, type, out_length

744

745

746

class PyrexGroupCompressor(_CommonGroupCompressor):

747

"""Produce a serialised group of compressed texts.

748

749

It contains code very similar to SequenceMatcher because of having a similar

750

task. However some key differences apply:

751

- there is no junk, we want a minimal edit not a human readable diff.

752

- we don't filter very common lines (because we don't know where a good

753

range will start, and after the first text we want to be emitting minmal

754

edits only.

755

- we chain the left side, not the right side

756

- we incrementally update the adjacency matrix as new lines are provided.

757

- we look for matches in all of the left side, so the routine which does

758

the analagous task of find_longest_match does not need to filter on the

759

left side.

760

"""

761

762

def __init__(self):

763

super(PyrexGroupCompressor, self).__init__()

764

self._delta_index = DeltaIndex()

765

766

def _compress(self, key, bytes, sha1, max_delta_size, soft=False):

767

"""see _CommonGroupCompressor._compress"""

768

input_len = len(bytes)

769

# By having action/label/sha1/len, we can parse the group if the index

770

# was ever destroyed, we have the key in 'label', we know the final

771

# bytes are valid from sha1, and we know where to find the end of this

772

# record because of 'len'. (the delta record itself will store the

773

# total length for the expanded record)

774

# 'len: %d\n' costs approximately 1% increase in total data

775

# Having the labels at all costs us 9-10% increase, 38% increase for

776

# inventory pages, and 5.8% increase for text pages

777

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

778

if self._delta_index._source_offset != self.endpoint:

779

raise AssertionError('_source_offset != endpoint'

780

' somehow the DeltaIndex got out of sync with'

781

' the output lines')

782

delta = self._delta_index.make_delta(bytes, max_delta_size)

783

if (delta is None):

784

type = 'fulltext'

785

enc_length = encode_base128_int(len(bytes))

786

len_mini_header = 1 + len(enc_length)

787

length = len(bytes) + len_mini_header

788

self._delta_index.add_source(bytes, len_mini_header)

789

new_chunks = ['f', enc_length, bytes]

790

else:

791

type = 'delta'

792

enc_length = encode_base128_int(len(delta))

793

len_mini_header = 1 + len(enc_length)

794

length = len(delta) + len_mini_header

795

new_chunks = ['d', enc_length, delta]

796

self._delta_index.add_delta_source(delta, len_mini_header)

797

# Before insertion

798

start = self.endpoint

799

chunk_start = len(self.chunks)

800

# Now output these bytes

801

self._output_chunks(new_chunks)

802

self.input_bytes += input_len

803

chunk_end = len(self.chunks)

804

self.labels_deltas[key] = (start, chunk_start,

805

self.endpoint, chunk_end)

806

if not self._delta_index._source_offset == self.endpoint:

807

raise AssertionError('the delta index is out of sync'

808

'with the output lines %s != %s'

809

% (self._delta_index._source_offset, self.endpoint))

810

return sha1, start, self.endpoint, type, length

811

812

def _output_chunks(self, new_chunks):

813

"""Output some chunks.

814

815

:param new_chunks: The chunks to output.

816

"""

817

self._last = (len(self.chunks), self.endpoint)

818

endpoint = self.endpoint

819

self.chunks.extend(new_chunks)

820

endpoint += sum(map(len, new_chunks))

821

self.endpoint = endpoint

822

823

824

def make_pack_factory(graph, delta, keylength):

825

"""Create a factory for creating a pack based groupcompress.

826

827

This is only functional enough to run interface tests, it doesn't try to

828

provide a full pack environment.

829

830

:param graph: Store a graph.

831

:param delta: Delta compress contents.

832

:param keylength: How long should keys be.

833

"""

834

def factory(transport):

835

parents = graph

836

ref_length = 0

837

if graph:

838

ref_length = 1

839

graph_index = BTreeBuilder(reference_lists=ref_length,

840

key_elements=keylength)

841

stream = transport.open_write_stream('newpack')

842

writer = pack.ContainerWriter(stream.write)

843

writer.begin()

844

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

845

add_callback=graph_index.add_nodes)

846

access = _DirectPackAccess({})

847

access.set_writer(writer, graph_index, (transport, 'newpack'))

848

result = GroupCompressVersionedFiles(index, access, delta)

849

result.stream = stream

850

result.writer = writer

851

return result

852

return factory

853

854

855

def cleanup_pack_group(versioned_files):

856

versioned_files.writer.end()

857

versioned_files.stream.close()

858

859

860

class GroupCompressVersionedFiles(VersionedFiles):

861

"""A group-compress based VersionedFiles implementation."""

862

863

def __init__(self, index, access, delta=True):

864

"""Create a GroupCompressVersionedFiles object.

865

866

:param index: The index object storing access and graph data.

867

:param access: The access object storing raw data.

868

:param delta: Whether to delta compress or just entropy compress.

869

"""

870

self._index = index

871

self._access = access

872

self._delta = delta

873

self._unadded_refs = {}

874

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

875

self._fallback_vfs = []

876

877

def add_lines(self, key, parents, lines, parent_texts=None,

878

left_matching_blocks=None, nostore_sha=None, random_id=False,

879

check_content=True):

880

"""Add a text to the store.

881

882

:param key: The key tuple of the text to add.

883

:param parents: The parents key tuples of the text to add.

884

:param lines: A list of lines. Each line must be a bytestring. And all

885

of them except the last must be terminated with \n and contain no

886

other \n's. The last line may either contain no \n's or a single

887

terminating \n. If the lines list does meet this constraint the add

888

routine may error or may succeed - but you will be unable to read

889

the data back accurately. (Checking the lines have been split

890

correctly is expensive and extremely unlikely to catch bugs so it

891

is not done at runtime unless check_content is True.)

892

:param parent_texts: An optional dictionary containing the opaque

893

representations of some or all of the parents of version_id to

894

allow delta optimisations. VERY IMPORTANT: the texts must be those

895

returned by add_lines or data corruption can be caused.

896

:param left_matching_blocks: a hint about which areas are common

897

between the text and its left-hand-parent. The format is

898

the SequenceMatcher.get_matching_blocks format.

899

:param nostore_sha: Raise ExistingContent and do not add the lines to

900

the versioned file if the digest of the lines matches this.

901

:param random_id: If True a random id has been selected rather than

902

an id determined by some deterministic process such as a converter

903

from a foreign VCS. When True the backend may choose not to check

904

for uniqueness of the resulting key within the versioned file, so

905

this should only be done when the result is expected to be unique

906

anyway.

907

:param check_content: If True, the lines supplied are verified to be

908

bytestrings that are correctly formed lines.

909

:return: The text sha1, the number of bytes in the text, and an opaque

910

representation of the inserted version which can be provided

911

back to future add_lines calls in the parent_texts dictionary.

912

"""

913

self._index._check_write_ok()

914

self._check_add(key, lines, random_id, check_content)

915

if parents is None:

916

# The caller might pass None if there is no graph data, but kndx

917

# indexes can't directly store that, so we give them

918

# an empty tuple instead.

919

parents = ()

920

# double handling for now. Make it work until then.

921

length = sum(map(len, lines))

922

record = ChunkedContentFactory(key, parents, None, lines)

923

sha1 = list(self._insert_record_stream([record], random_id=random_id,

924

nostore_sha=nostore_sha))[0]

925

return sha1, length, None

926

927

def add_fallback_versioned_files(self, a_versioned_files):

928

"""Add a source of texts for texts not present in this knit.

929

930

:param a_versioned_files: A VersionedFiles object.

931

"""

932

self._fallback_vfs.append(a_versioned_files)

933

934

def annotate(self, key):

935

"""See VersionedFiles.annotate."""

936

graph = Graph(self)

937

parent_map = self.get_parent_map([key])

938

if not parent_map:

939

raise errors.RevisionNotPresent(key, self)

940

if parent_map[key] is not None:

941

search = graph._make_breadth_first_searcher([key])

942

keys = set()

943

while True:

944

try:

945

present, ghosts = search.next_with_ghosts()

946

except StopIteration:

947

break

948

keys.update(present)

949

parent_map = self.get_parent_map(keys)

950

else:

951

keys = [key]

952

parent_map = {key:()}

953

head_cache = _mod_graph.FrozenHeadsCache(graph)

954

parent_cache = {}

955

reannotate = annotate.reannotate

956

for record in self.get_record_stream(keys, 'topological', True):

957

key = record.key

958

chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

959

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

960

parent_cache[key] = list(

961

reannotate(parent_lines, chunks, key, None, head_cache))

962

return parent_cache[key]

963

964

def check(self, progress_bar=None):

965

"""See VersionedFiles.check()."""

966

keys = self.keys()

967

for record in self.get_record_stream(keys, 'unordered', True):

968

record.get_bytes_as('fulltext')

969

970

def _check_add(self, key, lines, random_id, check_content):

971

"""check that version_id and lines are safe to add."""

972

version_id = key[-1]

973

if version_id is not None:

974

if osutils.contains_whitespace(version_id):

975

raise errors.InvalidRevisionId(version_id, self)

976

self.check_not_reserved_id(version_id)

977

# TODO: If random_id==False and the key is already present, we should

978

# probably check that the existing content is identical to what is

979

# being inserted, and otherwise raise an exception. This would make

980

# the bundle code simpler.

981

if check_content:

982

self._check_lines_not_unicode(lines)

983

self._check_lines_are_lines(lines)

984

985

def get_parent_map(self, keys):

986

"""Get a map of the graph parents of keys.

987

988

:param keys: The keys to look up parents for.

989

:return: A mapping from keys to parents. Absent keys are absent from

990

the mapping.

991

"""

992

return self._get_parent_map_with_sources(keys)[0]

993

994

def _get_parent_map_with_sources(self, keys):

995

"""Get a map of the parents of keys.

996

997

:param keys: The keys to look up parents for.

998

:return: A tuple. The first element is a mapping from keys to parents.

999

Absent keys are absent from the mapping. The second element is a

1000

list with the locations each key was found in. The first element

1001

is the in-this-knit parents, the second the first fallback source,

1002

and so on.

1003

"""

1004

result = {}

1005

sources = [self._index] + self._fallback_vfs

1006

source_results = []

1007

missing = set(keys)

1008

for source in sources:

1009

if not missing:

1010

break

1011

new_result = source.get_parent_map(missing)

1012

source_results.append(new_result)

1013

result.update(new_result)

1014

missing.difference_update(set(new_result))

1015

return result, source_results

1016

1017

def _get_block(self, index_memo):

1018

read_memo = index_memo[0:3]

1019

# get the group:

1020

try:

1021

block = self._group_cache[read_memo]

1022

except KeyError:

1023

# read the group

1024

zdata = self._access.get_raw_records([read_memo]).next()

1025

# decompress - whole thing - this is not a bug, as it

1026

# permits caching. We might want to store the partially

1027

# decompresed group and decompress object, so that recent

1028

# texts are not penalised by big groups.

1029

block = GroupCompressBlock.from_bytes(zdata)

1030

self._group_cache[read_memo] = block

1031

# cheapo debugging:

1032

# print len(zdata), len(plain)

1033

# parse - requires split_lines, better to have byte offsets

1034

# here (but not by much - we only split the region for the

1035

# recipe, and we often want to end up with lines anyway.

1036

return block

1037

1038

def get_missing_compression_parent_keys(self):

1039

"""Return the keys of missing compression parents.

1040

1041

Missing compression parents occur when a record stream was missing

1042

basis texts, or a index was scanned that had missing basis texts.

1043

"""

1044

# GroupCompress cannot currently reference texts that are not in the

1045

# group, so this is valid for now

1046

return frozenset()

1047

1048

def get_record_stream(self, keys, ordering, include_delta_closure):

1049

"""Get a stream of records for keys.

1050

1051

:param keys: The keys to include.

1052

:param ordering: Either 'unordered' or 'topological'. A topologically

1053

sorted stream has compression parents strictly before their

1054

children.

1055

:param include_delta_closure: If True then the closure across any

1056

compression parents will be included (in the opaque data).

1057

:return: An iterator of ContentFactory objects, each of which is only

1058

valid until the iterator is advanced.

1059

"""

1060

# keys might be a generator

1061

orig_keys = list(keys)

1062

keys = set(keys)

1063

if not keys:

1064

return

1065

if (not self._index.has_graph

1066

and ordering in ('topological', 'groupcompress')):

1067

# Cannot topological order when no graph has been stored.

1068

# but we allow 'as-requested' or 'unordered'

1069

ordering = 'unordered'

1070

1071

remaining_keys = keys

1072

while True:

1073

try:

1074

keys = set(remaining_keys)

1075

for content_factory in self._get_remaining_record_stream(keys,

1076

orig_keys, ordering, include_delta_closure):

1077

remaining_keys.discard(content_factory.key)

1078

yield content_factory

1079

return

1080

except errors.RetryWithNewPacks, e:

1081

self._access.reload_or_raise(e)

1082

1083

def _find_from_fallback(self, missing):

1084

"""Find whatever keys you can from the fallbacks.

1085

1086

:param missing: A set of missing keys. This set will be mutated as keys

1087

are found from a fallback_vfs

1088

:return: (parent_map, key_to_source_map, source_results)

1089

parent_map the overall key => parent_keys

1090

key_to_source_map a dict from {key: source}

1091

source_results a list of (source: keys)

1092

"""

1093

parent_map = {}

1094

key_to_source_map = {}

1095

source_results = []

1096

for source in self._fallback_vfs:

1097

if not missing:

1098

break

1099

source_parents = source.get_parent_map(missing)

1100

parent_map.update(source_parents)

1101

source_parents = list(source_parents)

1102

source_results.append((source, source_parents))

1103

key_to_source_map.update((key, source) for key in source_parents)

1104

missing.difference_update(source_parents)

1105

return parent_map, key_to_source_map, source_results

1106

1107

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1108

"""Get the (source, [keys]) list.

1109

1110

The returned objects should be in the order defined by 'ordering',

1111

which can weave between different sources.

1112

:param ordering: Must be one of 'topological' or 'groupcompress'

1113

:return: List of [(source, [keys])] tuples, such that all keys are in

1114

the defined order, regardless of source.

1115

"""

1116

if ordering == 'topological':

1117

present_keys = topo_sort(parent_map)

1118

else:

1119

# ordering == 'groupcompress'

1120

# XXX: This only optimizes for the target ordering. We may need

1121

# to balance that with the time it takes to extract

1122

# ordering, by somehow grouping based on

1123

# locations[key][0:3]

1124

present_keys = sort_gc_optimal(parent_map)

1125

# Now group by source:

1126

source_keys = []

1127

current_source = None

1128

for key in present_keys:

1129

source = key_to_source_map.get(key, self)

1130

if source is not current_source:

1131

source_keys.append((source, []))

1132

current_source = source

1133

source_keys[-1][1].append(key)

1134

return source_keys

1135

1136

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1137

key_to_source_map):

1138

source_keys = []

1139

current_source = None

1140

for key in orig_keys:

1141

if key in locations or key in unadded_keys:

1142

source = self

1143

elif key in key_to_source_map:

1144

source = key_to_source_map[key]

1145

else: # absent

1146

continue

1147

if source is not current_source:

1148

source_keys.append((source, []))

1149

current_source = source

1150

source_keys[-1][1].append(key)

1151

return source_keys

1152

1153

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1154

source_result):

1155

def get_group(key):

1156

# This is the group the bytes are stored in, followed by the

1157

# location in the group

1158

return locations[key][0]

1159

present_keys = sorted(locations.iterkeys(), key=get_group)

1160

# We don't have an ordering for keys in the in-memory object, but

1161

# lets process the in-memory ones first.

1162

present_keys = list(unadded_keys) + present_keys

1163

# Now grab all of the ones from other sources

1164

source_keys = [(self, present_keys)]

1165

source_keys.extend(source_result)

1166

return source_keys

1167

1168

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1169

include_delta_closure):

1170

"""Get a stream of records for keys.

1171

1172

:param keys: The keys to include.

1173

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1174

'as-requested'

1175

:param include_delta_closure: If True then the closure across any

1176

compression parents will be included (in the opaque data).

1177

:return: An iterator of ContentFactory objects, each of which is only

1178

valid until the iterator is advanced.

1179

"""

1180

# Cheap: iterate

1181

locations = self._index.get_build_details(keys)

1182

unadded_keys = set(self._unadded_refs).intersection(keys)

1183

missing = keys.difference(locations)

1184

missing.difference_update(unadded_keys)

1185

(fallback_parent_map, key_to_source_map,

1186

source_result) = self._find_from_fallback(missing)

1187

if ordering in ('topological', 'groupcompress'):

1188

# would be better to not globally sort initially but instead

1189

# start with one key, recurse to its oldest parent, then grab

1190

# everything in the same group, etc.

1191

parent_map = dict((key, details[2]) for key, details in

1192

locations.iteritems())

1193

for key in unadded_keys:

1194

parent_map[key] = self._unadded_refs[key]

1195

parent_map.update(fallback_parent_map)

1196

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1197

key_to_source_map)

1198

elif ordering == 'as-requested':

1199

source_keys = self._get_as_requested_source_keys(orig_keys,

1200

locations, unadded_keys, key_to_source_map)

1201

else:

1202

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1203

# Otherwise we thrash the _group_cache and destroy performance

1204

source_keys = self._get_io_ordered_source_keys(locations,

1205

unadded_keys, source_result)

1206

for key in missing:

1207

yield AbsentContentFactory(key)

1208

manager = None

1209

last_read_memo = None

1210

# TODO: This works fairly well at batching up existing groups into a

1211

# streamable format, and possibly allowing for taking one big

1212

# group and splitting it when it isn't fully utilized.

1213

# However, it doesn't allow us to find under-utilized groups and

1214

# combine them into a bigger group on the fly.

1215

# (Consider the issue with how chk_map inserts texts

1216

# one-at-a-time.) This could be done at insert_record_stream()

1217

# time, but it probably would decrease the number of

1218

# bytes-on-the-wire for fetch.

1219

for source, keys in source_keys:

1220

if source is self:

1221

for key in keys:

1222

if key in self._unadded_refs:

1223

if manager is not None:

1224

for factory in manager.get_record_stream():

1225

yield factory

1226

last_read_memo = manager = None

1227

bytes, sha1 = self._compressor.extract(key)

1228

parents = self._unadded_refs[key]

1229

yield FulltextContentFactory(key, parents, sha1, bytes)

1230

else:

1231

index_memo, _, parents, (method, _) = locations[key]

1232

read_memo = index_memo[0:3]

1233

if last_read_memo != read_memo:

1234

# We are starting a new block. If we have a

1235

# manager, we have found everything that fits for

1236

# now, so yield records

1237

if manager is not None:

1238

for factory in manager.get_record_stream():

1239

yield factory

1240

# Now start a new manager

1241

block = self._get_block(index_memo)

1242

manager = _LazyGroupContentManager(block)

1243

last_read_memo = read_memo

1244

start, end = index_memo[3:5]

1245

manager.add_factory(key, parents, start, end)

1246

else:

1247

if manager is not None:

1248

for factory in manager.get_record_stream():

1249

yield factory

1250

last_read_memo = manager = None

1251

for record in source.get_record_stream(keys, ordering,

1252

include_delta_closure):

1253

yield record

1254

if manager is not None:

1255

for factory in manager.get_record_stream():

1256

yield factory

1257

1258

def get_sha1s(self, keys):

1259

"""See VersionedFiles.get_sha1s()."""

1260

result = {}

1261

for record in self.get_record_stream(keys, 'unordered', True):

1262

if record.sha1 != None:

1263

result[record.key] = record.sha1

1264

else:

1265

if record.storage_kind != 'absent':

1266

result[record.key] = osutils.sha_string(

1267

record.get_bytes_as('fulltext'))

1268

return result

1269

1270

def insert_record_stream(self, stream):

1271

"""Insert a record stream into this container.

1272

1273

:param stream: A stream of records to insert.

1274

:return: None

1275

:seealso VersionedFiles.get_record_stream:

1276

"""

1277

for _ in self._insert_record_stream(stream):

1278

pass

1279

1280

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1281

reuse_blocks=True):

1282

"""Internal core to insert a record stream into this container.

1283

1284

This helper function has a different interface than insert_record_stream

1285

to allow add_lines to be minimal, but still return the needed data.

1286

1287

:param stream: A stream of records to insert.

1288

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1289

raise ExistingContent, rather than committing the new text.

1290

:param reuse_blocks: If the source is streaming from

1291

groupcompress-blocks, just insert the blocks as-is, rather than

1292

expanding the texts and inserting again.

1293

:return: An iterator over the sha1 of the inserted records.

1294

:seealso insert_record_stream:

1295

:seealso add_lines:

1296

"""

1297

adapters = {}

1298

def get_adapter(adapter_key):

1299

try:

1300

return adapters[adapter_key]

1301

except KeyError:

1302

adapter_factory = adapter_registry.get(adapter_key)

1303

adapter = adapter_factory(self)

1304

adapters[adapter_key] = adapter

1305

return adapter

1306

# This will go up to fulltexts for gc to gc fetching, which isn't

1307

# ideal.

1308

self._compressor = GroupCompressor()

1309

self._unadded_refs = {}

1310

keys_to_add = []

1311

def flush():

1312

bytes = self._compressor.flush().to_bytes()

1313

index, start, length = self._access.add_raw_records(

1314

[(None, len(bytes))], bytes)[0]

1315

nodes = []

1316

for key, reads, refs in keys_to_add:

1317

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1318

self._index.add_records(nodes, random_id=random_id)

1319

self._unadded_refs = {}

1320

del keys_to_add[:]

1321

self._compressor = GroupCompressor()

1322

1323

last_prefix = None

1324

last_fulltext_len = None

1325

max_fulltext_len = 0

1326

max_fulltext_prefix = None

1327

insert_manager = None

1328

block_start = None

1329

block_length = None

1330

for record in stream:

1331

# Raise an error when a record is missing.

1332

if record.storage_kind == 'absent':

1333

raise errors.RevisionNotPresent(record.key, self)

1334

if reuse_blocks:

1335

# If the reuse_blocks flag is set, check to see if we can just

1336

# copy a groupcompress block as-is.

1337

if record.storage_kind == 'groupcompress-block':

1338

# Insert the raw block into the target repo

1339

insert_manager = record._manager

1340

insert_manager._check_rebuild_block()

1341

bytes = record._manager._block.to_bytes()

1342

_, start, length = self._access.add_raw_records(

1343

[(None, len(bytes))], bytes)[0]

1344

del bytes

1345

block_start = start

1346

block_length = length

1347

if record.storage_kind in ('groupcompress-block',

1348

'groupcompress-block-ref'):

1349

assert insert_manager is not None

1350

assert record._manager is insert_manager

1351

value = "%d %d %d %d" % (block_start, block_length,

1352

record._start, record._end)

1353

nodes = [(record.key, value, (record.parents,))]

1354

# TODO: Consider buffering up many nodes to be added, not

1355

# sure how much overhead this has, but we're seeing

1356

# ~23s / 120s in add_records calls

1357

self._index.add_records(nodes, random_id=random_id)

1358

continue

1359

try:

1360

bytes = record.get_bytes_as('fulltext')

1361

except errors.UnavailableRepresentation:

1362

adapter_key = record.storage_kind, 'fulltext'

1363

adapter = get_adapter(adapter_key)

1364

bytes = adapter.get_bytes(record)

1365

if len(record.key) > 1:

1366

prefix = record.key[0]

1367

soft = (prefix == last_prefix)

1368

else:

1369

prefix = None

1370

soft = False

1371

if max_fulltext_len < len(bytes):

1372

max_fulltext_len = len(bytes)

1373

max_fulltext_prefix = prefix

1374

(found_sha1, start_point, end_point, type,

1375

length) = self._compressor.compress(record.key,

1376

bytes, record.sha1, soft=soft,

1377

nostore_sha=nostore_sha)

1378

# delta_ratio = float(len(bytes)) / length

1379

# Check if we want to continue to include that text

1380

if (prefix == max_fulltext_prefix

1381

and end_point < 2 * max_fulltext_len):

1382

# As long as we are on the same file_id, we will fill at least

1383

# 2 * max_fulltext_len

1384

start_new_block = False

1385

elif end_point > 4*1024*1024:

1386

start_new_block = True

1387

elif (prefix is not None and prefix != last_prefix

1388

and end_point > 2*1024*1024):

1389

start_new_block = True

1390

else:

1391

start_new_block = False

1392

last_prefix = prefix

1393

if start_new_block:

1394

self._compressor.pop_last()

1395

flush()

1396

max_fulltext_len = len(bytes)

1397

(found_sha1, start_point, end_point, type,

1398

length) = self._compressor.compress(record.key,

1399

bytes, record.sha1)

1400

last_fulltext_len = length

1401

if record.key[-1] is None:

1402

key = record.key[:-1] + ('sha1:' + found_sha1,)

1403

else:

1404

key = record.key

1405

self._unadded_refs[key] = record.parents

1406

yield found_sha1

1407

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1408

(record.parents,)))

1409

if len(keys_to_add):

1410

flush()

1411

self._compressor = None

1412

1413

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1414

"""Iterate over the lines in the versioned files from keys.

1415

1416

This may return lines from other keys. Each item the returned

1417

iterator yields is a tuple of a line and a text version that that line

1418

is present in (not introduced in).

1419

1420

Ordering of results is in whatever order is most suitable for the

1421

underlying storage format.

1422

1423

If a progress bar is supplied, it may be used to indicate progress.

1424

The caller is responsible for cleaning up progress bars (because this

1425

is an iterator).

1426

1427

NOTES:

1428

* Lines are normalised by the underlying store: they will all have \n

1429

terminators.

1430

* Lines are returned in arbitrary order.

1431

1432

:return: An iterator over (line, key).

1433

"""

1434

if pb is None:

1435

pb = progress.DummyProgress()

1436

keys = set(keys)

1437

total = len(keys)

1438

# we don't care about inclusions, the caller cares.

1439

# but we need to setup a list of records to visit.

1440

# we need key, position, length

1441

for key_idx, record in enumerate(self.get_record_stream(keys,

1442

'unordered', True)):

1443

# XXX: todo - optimise to use less than full texts.

1444

key = record.key

1445

pb.update('Walking content', key_idx, total)

1446

if record.storage_kind == 'absent':

1447

raise errors.RevisionNotPresent(key, self)

1448

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1449

for line in lines:

1450

yield line, key

1451

pb.update('Walking content', total, total)

1452

1453

def keys(self):

1454

"""See VersionedFiles.keys."""

1455

if 'evil' in debug.debug_flags:

1456

trace.mutter_callsite(2, "keys scales with size of history")

1457

sources = [self._index] + self._fallback_vfs

1458

result = set()

1459

for source in sources:

1460

result.update(source.keys())

1461

return result

1462

1463

1464

class _GCGraphIndex(object):

1465

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1466

1467

def __init__(self, graph_index, is_locked, parents=True,

1468

add_callback=None):

1469

"""Construct a _GCGraphIndex on a graph_index.

1470

1471

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1472

:param is_locked: A callback, returns True if the index is locked and

1473

thus usable.

1474

:param parents: If True, record knits parents, if not do not record

1475

parents.

1476

:param add_callback: If not None, allow additions to the index and call

1477

this callback with a list of added GraphIndex nodes:

1478

[(node, value, node_refs), ...]

1479

"""

1480

self._add_callback = add_callback

1481

self._graph_index = graph_index

1482

self._parents = parents

1483

self.has_graph = parents

1484

self._is_locked = is_locked

1485

1486

def add_records(self, records, random_id=False):

1487

"""Add multiple records to the index.

1488

1489

This function does not insert data into the Immutable GraphIndex

1490

backing the KnitGraphIndex, instead it prepares data for insertion by

1491

the caller and checks that it is safe to insert then calls

1492

self._add_callback with the prepared GraphIndex nodes.

1493

1494

:param records: a list of tuples:

1495

(key, options, access_memo, parents).

1496

:param random_id: If True the ids being added were randomly generated

1497

and no check for existence will be performed.

1498

"""

1499

if not self._add_callback:

1500

raise errors.ReadOnlyError(self)

1501

# we hope there are no repositories with inconsistent parentage

1502

# anymore.

1503

1504

changed = False

1505

keys = {}

1506

for (key, value, refs) in records:

1507

if not self._parents:

1508

if refs:

1509

for ref in refs:

1510

if ref:

1511

raise KnitCorrupt(self,

1512

"attempt to add node with parents "

1513

"in parentless index.")

1514

refs = ()

1515

changed = True

1516

keys[key] = (value, refs)

1517

# check for dups

1518

if not random_id:

1519

present_nodes = self._get_entries(keys)

1520

for (index, key, value, node_refs) in present_nodes:

1521

if node_refs != keys[key][1]:

1522

raise errors.KnitCorrupt(self, "inconsistent details in add_records"

1523

": %s %s" % ((value, node_refs), keys[key]))

1524

del keys[key]

1525

changed = True

1526

if changed:

1527

result = []

1528

if self._parents:

1529

for key, (value, node_refs) in keys.iteritems():

1530

result.append((key, value, node_refs))

1531

else:

1532

for key, (value, node_refs) in keys.iteritems():

1533

result.append((key, value))

1534

records = result

1535

self._add_callback(records)

1536

1537

def _check_read(self):

1538

"""Raise an exception if reads are not permitted."""

1539

if not self._is_locked():

1540

raise errors.ObjectNotLocked(self)

1541

1542

def _check_write_ok(self):

1543

"""Raise an exception if writes are not permitted."""

1544

if not self._is_locked():

1545

raise errors.ObjectNotLocked(self)

1546

1547

def _get_entries(self, keys, check_present=False):

1548

"""Get the entries for keys.

1549

1550

Note: Callers are responsible for checking that the index is locked

1551

before calling this method.

1552

1553

:param keys: An iterable of index key tuples.

1554

"""

1555

keys = set(keys)

1556

found_keys = set()

1557

if self._parents:

1558

for node in self._graph_index.iter_entries(keys):

1559

yield node

1560

found_keys.add(node[1])

1561

else:

1562

# adapt parentless index to the rest of the code.

1563

for node in self._graph_index.iter_entries(keys):

1564

yield node[0], node[1], node[2], ()

1565

found_keys.add(node[1])

1566

if check_present:

1567

missing_keys = keys.difference(found_keys)

1568

if missing_keys:

1569

raise RevisionNotPresent(missing_keys.pop(), self)

1570

1571

def get_parent_map(self, keys):

1572

"""Get a map of the parents of keys.

1573

1574

:param keys: The keys to look up parents for.

1575

:return: A mapping from keys to parents. Absent keys are absent from

1576

the mapping.

1577

"""

1578

self._check_read()

1579

nodes = self._get_entries(keys)

1580

result = {}

1581

if self._parents:

1582

for node in nodes:

1583

result[node[1]] = node[3][0]

1584

else:

1585

for node in nodes:

1586

result[node[1]] = None

1587

return result

1588

1589

def get_build_details(self, keys):

1590

"""Get the various build details for keys.

1591

1592

Ghosts are omitted from the result.

1593

1594

:param keys: An iterable of keys.

1595

:return: A dict of key:

1596

(index_memo, compression_parent, parents, record_details).

1597

index_memo

1598

opaque structure to pass to read_records to extract the raw

1599

data

1600

compression_parent

1601

Content that this record is built upon, may be None

1602

parents

1603

Logical parents of this node

1604

record_details

1605

extra information about the content which needs to be passed to

1606

Factory.parse_record

1607

"""

1608

self._check_read()

1609

result = {}

1610

entries = self._get_entries(keys)

1611

for entry in entries:

1612

key = entry[1]

1613

if not self._parents:

1614

parents = None

1615

else:

1616

parents = entry[3][0]

1617

method = 'group'

1618

result[key] = (self._node_to_position(entry),

1619

None, parents, (method, None))

1620

return result

1621

1622

def keys(self):

1623

"""Get all the keys in the collection.

1624

1625

The keys are not ordered.

1626

"""

1627

self._check_read()

1628

return [node[1] for node in self._graph_index.iter_all_entries()]

1629

1630

def _node_to_position(self, node):

1631

"""Convert an index value to position details."""

1632

bits = node[2].split(' ')

1633

# It would be nice not to read the entire gzip.

1634

start = int(bits[0])

1635

stop = int(bits[1])

1636

basis_end = int(bits[2])

1637

delta_end = int(bits[3])

1638

return node[0], start, stop, basis_end, delta_end

1639

1640

1641

from bzrlib._groupcompress_py import (

1642

apply_delta,

1643

apply_delta_to_source,

1644

encode_base128_int,

1645

decode_base128_int,

1646

LinesDeltaIndex,

1647

)

1648

try:

1649

from bzrlib._groupcompress_pyx import (

1650

apply_delta,

1651

apply_delta_to_source,

1652

DeltaIndex,

1653

encode_base128_int,

1654

decode_base128_int,

1655

)

1656

GroupCompressor = PyrexGroupCompressor

1657

except ImportError:

1658

GroupCompressor = PythonGroupCompressor

1659

Older »