~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Robert Collins
Date: 2009-09-07 03:08:30 UTC
mto: This revision was merged to the branch mainline in revision 4690.
Revision ID: robertc@robertcollins.net-20090907030830-rf59kt28d550eauj

Milestones language tightning, internal consistency.

files added:
COPYING.txt

bzr.ico

bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/cache_utf8.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/debug.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/revisiontree.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/store/revision

bzrlib/store/versioned

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_merge_directive.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_tree

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tuned_gzip.py

bzrlib/urlutils.py

bzrlib/util/_bencode_py.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/views.py

bzrlib/weave_commands.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_static/en/quick-reference/Makefile

doc/_static/en/quick-reference/bzr-quick-reference.pdf

doc/_static/en/quick-reference/bzr-quick-reference.png

doc/_static/en/quick-reference/bzr-quick-reference.svg

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/bazaar-vcs.org.kid

doc/conf.py

doc/contents.txt

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-for-2x.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/developer-guide

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/quick-reference

doc/es/quick-reference/Makefile

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/es/quick-reference/quick-start-summary.svg

doc/es/release-notes

doc/es/user-guide

doc/es/user-guide/index.txt

doc/es/user-guide/resolving_conflicts.txt

doc/es/user-guide/version_info.txt

doc/es/user-reference

doc/index.es.txt

doc/index.ru.txt

doc/index.txt

doc/make.bat

doc/news-template.txt

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

profile_imports.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/time_graph.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files removed:
NEWS.developers

build-api

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_reweave.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

notes

files renamed:
bzrlib/graph.py => bzrlib/deprecated_graph.py

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/store/weave.py => bzrlib/store/versioned/__init__.py

bzrlib/tests/test_annotate.py => bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/test_uncommit.py => bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/HTTPTestUtil.py => bzrlib/tests/http_utils.py

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/repository_implementations/ => bzrlib/tests/per_repository/

bzrlib/tests/test_revprops.py => bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

bzrlib/tests/test_basis_inventory.py => bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/tests/test_graph.py => bzrlib/tests/test_deprecated_graph.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/tests/test_revisionspec.py

bzrlib/transport/ftp.py => bzrlib/transport/ftp/__init__.py

bzrlib/transport/http.py => bzrlib/transport/http/__init__.py

bzrlib/win32console.py => bzrlib/win32utils.py

bzrlib/xml5.py => bzrlib/xml8.py

HACKING => doc/en/developer-guide/HACKING.txt

tutorial.txt => doc/en/tutorials/tutorial.txt

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

import time

import zlib

try:

import pylzma

except ImportError:

pylzma = None

from bzrlib import (

annotate,

debug,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

trace,

)

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

from bzrlib.versionedfile import (

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFiles,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

_USE_LZMA = False and (pylzma is not None)

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

GCB_HEADER = 'gcb1z\n'

# Group Compress Block v1 Lzma

GCB_LZ_HEADER = 'gcb1l\n'

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

100

def __init__(self):

101

# map by key? or just order in file?

102

self._compressor_name = None

103

self._z_content = None

104

self._z_content_decompressor = None

105

self._z_content_length = None

106

self._content_length = None

107

self._content = None

108

self._content_chunks = None

109

110

def __len__(self):

111

# This is the maximum number of bytes this object will reference if

112

# everything is decompressed. However, if we decompress less than

113

# everything... (this would cause some problems for LRUSizeCache)

114

return self._content_length + self._z_content_length

115

116

def _ensure_content(self, num_bytes=None):

117

"""Make sure that content has been expanded enough.

118

119

:param num_bytes: Ensure that we have extracted at least num_bytes of

120

content. If None, consume everything

121

"""

122

# TODO: If we re-use the same content block at different times during

123

# get_record_stream(), it is possible that the first pass will

124

# get inserted, triggering an extract/_ensure_content() which

125

# will get rid of _z_content. And then the next use of the block

126

# will try to access _z_content (to send it over the wire), and

127

# fail because it is already extracted. Consider never releasing

128

# _z_content because of this.

129

if num_bytes is None:

130

num_bytes = self._content_length

131

elif (self._content_length is not None

132

and num_bytes > self._content_length):

133

raise AssertionError(

134

'requested num_bytes (%d) > content length (%d)'

135

% (num_bytes, self._content_length))

136

# Expand the content if required

137

if self._content is None:

138

if self._content_chunks is not None:

139

self._content = ''.join(self._content_chunks)

140

self._content_chunks = None

141

if self._content is None:

142

if self._z_content is None:

143

raise AssertionError('No content to decompress')

144

if self._z_content == '':

145

self._content = ''

146

elif self._compressor_name == 'lzma':

147

# We don't do partial lzma decomp yet

148

self._content = pylzma.decompress(self._z_content)

149

elif self._compressor_name == 'zlib':

150

# Start a zlib decompressor

151

if num_bytes is None:

152

self._content = zlib.decompress(self._z_content)

153

else:

154

self._z_content_decompressor = zlib.decompressobj()

155

# Seed the decompressor with the uncompressed bytes, so

156

# that the rest of the code is simplified

157

self._content = self._z_content_decompressor.decompress(

158

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

159

else:

160

raise AssertionError('Unknown compressor: %r'

161

% self._compressor_name)

162

# Any bytes remaining to be decompressed will be in the decompressors

163

# 'unconsumed_tail'

164

165

# Do we have enough bytes already?

166

if num_bytes is not None and len(self._content) >= num_bytes:

167

return

168

if num_bytes is None and self._z_content_decompressor is None:

169

# We must have already decompressed everything

170

return

171

# If we got this far, and don't have a decompressor, something is wrong

172

if self._z_content_decompressor is None:

173

raise AssertionError(

174

'No decompressor to decompress %d bytes' % num_bytes)

175

remaining_decomp = self._z_content_decompressor.unconsumed_tail

176

if num_bytes is None:

177

if remaining_decomp:

178

# We don't know how much is left, but we'll decompress it all

179

self._content += self._z_content_decompressor.decompress(

180

remaining_decomp)

181

# Note: There's what I consider a bug in zlib.decompressobj

182

# If you pass back in the entire unconsumed_tail, only

183

# this time you don't pass a max-size, it doesn't

184

# change the unconsumed_tail back to None/''.

185

# However, we know we are done with the whole stream

186

self._z_content_decompressor = None

187

# XXX: Why is this the only place in this routine we set this?

188

self._content_length = len(self._content)

189

else:

190

if not remaining_decomp:

191

raise AssertionError('Nothing left to decompress')

192

needed_bytes = num_bytes - len(self._content)

193

# We always set max_size to 32kB over the minimum needed, so that

194

# zlib will give us as much as we really want.

195

# TODO: If this isn't good enough, we could make a loop here,

196

# that keeps expanding the request until we get enough

197

self._content += self._z_content_decompressor.decompress(

198

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

199

if len(self._content) < num_bytes:

200

raise AssertionError('%d bytes wanted, only %d available'

201

% (num_bytes, len(self._content)))

202

if not self._z_content_decompressor.unconsumed_tail:

203

# The stream is finished

204

self._z_content_decompressor = None

205

206

def _parse_bytes(self, bytes, pos):

207

"""Read the various lengths from the header.

208

209

This also populates the various 'compressed' buffers.

210

211

:return: The position in bytes just after the last newline

212

"""

213

# At present, we have 2 integers for the compressed and uncompressed

214

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

215

# checking too far, cap the search to 14 bytes.

216

pos2 = bytes.index('\n', pos, pos + 14)

217

self._z_content_length = int(bytes[pos:pos2])

218

pos = pos2 + 1

219

pos2 = bytes.index('\n', pos, pos + 14)

220

self._content_length = int(bytes[pos:pos2])

221

pos = pos2 + 1

222

if len(bytes) != (pos + self._z_content_length):

223

# XXX: Define some GCCorrupt error ?

224

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

225

(len(bytes), pos, self._z_content_length))

226

self._z_content = bytes[pos:]

227

228

@classmethod

229

def from_bytes(cls, bytes):

230

out = cls()

231

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

232

raise ValueError('bytes did not start with any of %r'

233

% (cls.GCB_KNOWN_HEADERS,))

234

# XXX: why not testing the whole header ?

235

if bytes[4] == 'z':

236

out._compressor_name = 'zlib'

237

elif bytes[4] == 'l':

238

out._compressor_name = 'lzma'

239

else:

240

raise ValueError('unknown compressor: %r' % (bytes,))

241

out._parse_bytes(bytes, 6)

242

return out

243

244

def extract(self, key, start, end, sha1=None):

245

"""Extract the text for a specific key.

246

247

:param key: The label used for this content

248

:param sha1: TODO (should we validate only when sha1 is supplied?)

249

:return: The bytes for the content

250

"""

251

if start == end == 0:

252

return ''

253

self._ensure_content(end)

254

# The bytes are 'f' or 'd' for the type, then a variable-length

255

# base128 integer for the content size, then the actual content

256

# We know that the variable-length integer won't be longer than 5

257

# bytes (it takes 5 bytes to encode 2^32)

258

c = self._content[start]

259

if c == 'f':

260

type = 'fulltext'

261

else:

262

if c != 'd':

263

raise ValueError('Unknown content control code: %s'

264

% (c,))

265

type = 'delta'

266

content_len, len_len = decode_base128_int(

267

self._content[start + 1:start + 6])

268

content_start = start + 1 + len_len

269

if end != content_start + content_len:

270

raise ValueError('end != len according to field header'

271

' %s != %s' % (end, content_start + content_len))

272

if c == 'f':

273

bytes = self._content[content_start:end]

274

elif c == 'd':

275

bytes = apply_delta_to_source(self._content, content_start, end)

276

return bytes

277

278

def set_chunked_content(self, content_chunks, length):

279

"""Set the content of this block to the given chunks."""

280

# If we have lots of short lines, it is may be more efficient to join

281

# the content ahead of time. If the content is <10MiB, we don't really

282

# care about the extra memory consumption, so we can just pack it and

283

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

284

# mysql, which is below the noise margin

285

self._content_length = length

286

self._content_chunks = content_chunks

287

self._content = None

288

self._z_content = None

289

290

def set_content(self, content):

291

"""Set the content of this block."""

292

self._content_length = len(content)

293

self._content = content

294

self._z_content = None

295

296

def _create_z_content_using_lzma(self):

297

if self._content_chunks is not None:

298

self._content = ''.join(self._content_chunks)

299

self._content_chunks = None

300

if self._content is None:

301

raise AssertionError('Nothing to compress')

302

self._z_content = pylzma.compress(self._content)

303

self._z_content_length = len(self._z_content)

304

305

def _create_z_content_from_chunks(self):

306

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

307

compressed_chunks = map(compressor.compress, self._content_chunks)

308

compressed_chunks.append(compressor.flush())

309

self._z_content = ''.join(compressed_chunks)

310

self._z_content_length = len(self._z_content)

311

312

def _create_z_content(self):

313

if self._z_content is not None:

314

return

315

if _USE_LZMA:

316

self._create_z_content_using_lzma()

317

return

318

if self._content_chunks is not None:

319

self._create_z_content_from_chunks()

320

return

321

self._z_content = zlib.compress(self._content)

322

self._z_content_length = len(self._z_content)

323

324

def to_bytes(self):

325

"""Encode the information into a byte stream."""

326

self._create_z_content()

327

if _USE_LZMA:

328

header = self.GCB_LZ_HEADER

329

else:

330

header = self.GCB_HEADER

331

chunks = [header,

332

'%d\n%d\n' % (self._z_content_length, self._content_length),

333

self._z_content,

334

]

335

return ''.join(chunks)

336

337

def _dump(self, include_text=False):

338

"""Take this block, and spit out a human-readable structure.

339

340

:param include_text: Inserts also include text bits, chose whether you

341

want this displayed in the dump or not.

342

:return: A dump of the given block. The layout is something like:

343

[('f', length), ('d', delta_length, text_length, [delta_info])]

344

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

345

...]

346

"""

347

self._ensure_content()

348

result = []

349

pos = 0

350

while pos < self._content_length:

351

kind = self._content[pos]

352

pos += 1

353

if kind not in ('f', 'd'):

354

raise ValueError('invalid kind character: %r' % (kind,))

355

content_len, len_len = decode_base128_int(

356

self._content[pos:pos + 5])

357

pos += len_len

358

if content_len + pos > self._content_length:

359

raise ValueError('invalid content_len %d for record @ pos %d'

360

% (content_len, pos - len_len - 1))

361

if kind == 'f': # Fulltext

362

if include_text:

363

text = self._content[pos:pos+content_len]

364

result.append(('f', content_len, text))

365

else:

366

result.append(('f', content_len))

367

elif kind == 'd': # Delta

368

delta_content = self._content[pos:pos+content_len]

369

delta_info = []

370

# The first entry in a delta is the decompressed length

371

decomp_len, delta_pos = decode_base128_int(delta_content)

372

result.append(('d', content_len, decomp_len, delta_info))

373

measured_len = 0

374

while delta_pos < content_len:

375

c = ord(delta_content[delta_pos])

376

delta_pos += 1

377

if c & 0x80: # Copy

378

(offset, length,

379

delta_pos) = decode_copy_instruction(delta_content, c,

380

delta_pos)

381

if include_text:

382

text = self._content[offset:offset+length]

383

delta_info.append(('c', offset, length, text))

384

else:

385

delta_info.append(('c', offset, length))

386

measured_len += length

387

else: # Insert

388

if include_text:

389

txt = delta_content[delta_pos:delta_pos+c]

390

else:

391

txt = ''

392

delta_info.append(('i', c, txt))

393

measured_len += c

394

delta_pos += c

395

if delta_pos != content_len:

396

raise ValueError('Delta consumed a bad number of bytes:'

397

' %d != %d' % (delta_pos, content_len))

398

if measured_len != decomp_len:

399

raise ValueError('Delta claimed fulltext was %d bytes, but'

400

' extraction resulted in %d bytes'

401

% (decomp_len, measured_len))

402

pos += content_len

403

return result

404

405

406

class _LazyGroupCompressFactory(object):

407

"""Yield content from a GroupCompressBlock on demand."""

408

409

def __init__(self, key, parents, manager, start, end, first):

410

"""Create a _LazyGroupCompressFactory

411

412

:param key: The key of just this record

413

:param parents: The parents of this key (possibly None)

414

:param gc_block: A GroupCompressBlock object

415

:param start: Offset of the first byte for this record in the

416

uncompressd content

417

:param end: Offset of the byte just after the end of this record

418

(ie, bytes = content[start:end])

419

:param first: Is this the first Factory for the given block?

420

"""

421

self.key = key

422

self.parents = parents

423

self.sha1 = None

424

# Note: This attribute coupled with Manager._factories creates a

425

# reference cycle. Perhaps we would rather use a weakref(), or

426

# find an appropriate time to release the ref. After the first

427

# get_bytes_as call? After Manager.get_record_stream() returns

428

# the object?

429

self._manager = manager

430

self._bytes = None

431

self.storage_kind = 'groupcompress-block'

432

if not first:

433

self.storage_kind = 'groupcompress-block-ref'

434

self._first = first

435

self._start = start

436

self._end = end

437

438

def __repr__(self):

439

return '%s(%s, first=%s)' % (self.__class__.__name__,

440

self.key, self._first)

441

442

def get_bytes_as(self, storage_kind):

443

if storage_kind == self.storage_kind:

444

if self._first:

445

# wire bytes, something...

446

return self._manager._wire_bytes()

447

else:

448

return ''

449

if storage_kind in ('fulltext', 'chunked'):

450

if self._bytes is None:

451

# Grab and cache the raw bytes for this entry

452

# and break the ref-cycle with _manager since we don't need it

453

# anymore

454

self._manager._prepare_for_extract()

455

block = self._manager._block

456

self._bytes = block.extract(self.key, self._start, self._end)

457

# There are code paths that first extract as fulltext, and then

458

# extract as storage_kind (smart fetch). So we don't break the

459

# refcycle here, but instead in manager.get_record_stream()

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

472

# current size, and still be considered

473

# resuable

474

_full_block_size = 4*1024*1024

475

_full_mixed_block_size = 2*1024*1024

476

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

477

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

478

479

def __init__(self, block):

480

self._block = block

481

# We need to preserve the ordering

482

self._factories = []

483

self._last_byte = 0

484

485

def add_factory(self, key, parents, start, end):

486

if not self._factories:

487

first = True

488

else:

489

first = False

490

# Note that this creates a reference cycle....

491

factory = _LazyGroupCompressFactory(key, parents, self,

492

start, end, first=first)

493

# max() works here, but as a function call, doing a compare seems to be

494

# significantly faster, timeit says 250ms for max() and 100ms for the

495

# comparison

496

if end > self._last_byte:

497

self._last_byte = end

498

self._factories.append(factory)

499

500

def get_record_stream(self):

501

"""Get a record for all keys added so far."""

502

for factory in self._factories:

503

yield factory

504

# Break the ref-cycle

505

factory._bytes = None

506

factory._manager = None

507

# TODO: Consider setting self._factories = None after the above loop,

508

# as it will break the reference cycle

509

510

def _trim_block(self, last_byte):

511

"""Create a new GroupCompressBlock, with just some of the content."""

512

# None of the factories need to be adjusted, because the content is

513

# located in an identical place. Just that some of the unreferenced

514

# trailing bytes are stripped

515

trace.mutter('stripping trailing bytes from groupcompress block'

516

' %d => %d', self._block._content_length, last_byte)

517

new_block = GroupCompressBlock()

518

self._block._ensure_content(last_byte)

519

new_block.set_content(self._block._content[:last_byte])

520

self._block = new_block

521

522

def _rebuild_block(self):

523

"""Create a new GroupCompressBlock with only the referenced texts."""

524

compressor = GroupCompressor()

525

tstart = time.time()

526

old_length = self._block._content_length

527

end_point = 0

528

for factory in self._factories:

529

bytes = factory.get_bytes_as('fulltext')

530

(found_sha1, start_point, end_point,

531

type) = compressor.compress(factory.key, bytes, factory.sha1)

532

# Now update this factory with the new offsets, etc

533

factory.sha1 = found_sha1

534

factory._start = start_point

535

factory._end = end_point

536

self._last_byte = end_point

537

new_block = compressor.flush()

538

# TODO: Should we check that new_block really *is* smaller than the old

539

# block? It seems hard to come up with a method that it would

540

# expand, since we do full compression again. Perhaps based on a

541

# request that ends up poorly ordered?

542

delta = time.time() - tstart

543

self._block = new_block

544

trace.mutter('creating new compressed block on-the-fly in %.3fs'

545

' %d bytes => %d bytes', delta, old_length,

546

self._block._content_length)

547

548

def _prepare_for_extract(self):

549

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

550

# We expect that if one child is going to fulltext, all will be. This

551

# helps prevent all of them from extracting a small amount at a time.

552

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

553

# time (self._block._content) is a little expensive.

554

self._block._ensure_content(self._last_byte)

555

556

def _check_rebuild_action(self):

557

"""Check to see if our block should be repacked."""

558

total_bytes_used = 0

559

last_byte_used = 0

560

for factory in self._factories:

561

total_bytes_used += factory._end - factory._start

562

if last_byte_used < factory._end:

563

last_byte_used = factory._end

564

# If we are using more than half of the bytes from the block, we have

565

# nothing else to check

566

if total_bytes_used * 2 >= self._block._content_length:

567

return None, last_byte_used, total_bytes_used

568

# We are using less than 50% of the content. Is the content we are

569

# using at the beginning of the block? If so, we can just trim the

570

# tail, rather than rebuilding from scratch.

571

if total_bytes_used * 2 > last_byte_used:

572

return 'trim', last_byte_used, total_bytes_used

573

574

# We are using a small amount of the data, and it isn't just packed

575

# nicely at the front, so rebuild the content.

576

# Note: This would be *nicer* as a strip-data-from-group, rather than

577

# building it up again from scratch

578

# It might be reasonable to consider the fulltext sizes for

579

# different bits when deciding this, too. As you may have a small

580

# fulltext, and a trivial delta, and you are just trading around

581

# for another fulltext. If we do a simple 'prune' you may end up

582

# expanding many deltas into fulltexts, as well.

583

# If we build a cheap enough 'strip', then we could try a strip,

584

# if that expands the content, we then rebuild.

585

return 'rebuild', last_byte_used, total_bytes_used

586

587

def check_is_well_utilized(self):

588

"""Is the current block considered 'well utilized'?

589

590

This heuristic asks if the current block considers itself to be a fully

591

developed group, rather than just a loose collection of data.

592

"""

593

if len(self._factories) == 1:

594

# A block of length 1 could be improved by combining with other

595

# groups - don't look deeper. Even larger than max size groups

596

# could compress well with adjacent versions of the same thing.

597

return False

598

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

599

block_size = self._block._content_length

600

if total_bytes_used < block_size * self._max_cut_fraction:

601

# This block wants to trim itself small enough that we want to

602

# consider it under-utilized.

603

return False

604

# TODO: This code is meant to be the twin of _insert_record_stream's

605

# 'start_new_block' logic. It would probably be better to factor

606

# out that logic into a shared location, so that it stays

607

# together better

608

# We currently assume a block is properly utilized whenever it is >75%

609

# of the size of a 'full' block. In normal operation, a block is

610

# considered full when it hits 4MB of same-file content. So any block

611

# >3MB is 'full enough'.

612

# The only time this isn't true is when a given block has large-object

613

# content. (a single file >4MB, etc.)

614

# Under these circumstances, we allow a block to grow to

615

# 2 x largest_content. Which means that if a given block had a large

616

# object, it may actually be under-utilized. However, given that this

617

# is 'pack-on-the-fly' it is probably reasonable to not repack large

618

# content blobs on-the-fly. Note that because we return False for all

619

# 1-item blobs, we will repack them; we may wish to reevaluate our

620

# treatment of large object blobs in the future.

621

if block_size >= self._full_enough_block_size:

622

return True

623

# If a block is <3MB, it still may be considered 'full' if it contains

624

# mixed content. The current rule is 2MB of mixed content is considered

625

# full. So check to see if this block contains mixed content, and

626

# set the threshold appropriately.

627

common_prefix = None

628

for factory in self._factories:

629

prefix = factory.key[:-1]

630

if common_prefix is None:

631

common_prefix = prefix

632

elif prefix != common_prefix:

633

# Mixed content, check the size appropriately

634

if block_size >= self._full_enough_mixed_block_size:

635

return True

636

break

637

# The content failed both the mixed check and the single-content check

638

# so obviously it is not fully utilized

639

# TODO: there is one other constraint that isn't being checked

640

# namely, that the entries in the block are in the appropriate

641

# order. For example, you could insert the entries in exactly

642

# reverse groupcompress order, and we would think that is ok.

643

# (all the right objects are in one group, and it is fully

644

# utilized, etc.) For now, we assume that case is rare,

645

# especially since we should always fetch in 'groupcompress'

646

# order.

647

return False

648

649

def _check_rebuild_block(self):

650

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

651

if action is None:

652

return

653

if action == 'trim':

654

self._trim_block(last_byte_used)

655

elif action == 'rebuild':

656

self._rebuild_block()

657

else:

658

raise ValueError('unknown rebuild action: %r' % (action,))

659

660

def _wire_bytes(self):

661

"""Return a byte stream suitable for transmitting over the wire."""

662

self._check_rebuild_block()

663

# The outer block starts with:

664

# 'groupcompress-block\n'

665

# <length of compressed key info>\n

666

# <length of uncompressed info>\n

667

# <length of gc block>\n

668

# <header bytes>

669

# <gc-block>

670

lines = ['groupcompress-block\n']

671

# The minimal info we need is the key, the start offset, and the

672

# parents. The length and type are encoded in the record itself.

673

# However, passing in the other bits makes it easier. The list of

674

# keys, and the start offset, the length

675

# 1 line key

676

# 1 line with parents, '' for ()

677

# 1 line for start offset

678

# 1 line for end byte

679

header_lines = []

680

for factory in self._factories:

681

key_bytes = '\x00'.join(factory.key)

682

parents = factory.parents

683

if parents is None:

684

parent_bytes = 'None:'

685

else:

686

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

687

record_header = '%s\n%s\n%d\n%d\n' % (

688

key_bytes, parent_bytes, factory._start, factory._end)

689

header_lines.append(record_header)

690

# TODO: Can we break the refcycle at this point and set

691

# factory._manager = None?

692

header_bytes = ''.join(header_lines)

693

del header_lines

694

header_bytes_len = len(header_bytes)

695

z_header_bytes = zlib.compress(header_bytes)

696

del header_bytes

697

z_header_bytes_len = len(z_header_bytes)

698

block_bytes = self._block.to_bytes()

699

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

700

len(block_bytes)))

701

lines.append(z_header_bytes)

702

lines.append(block_bytes)

703

del z_header_bytes, block_bytes

704

return ''.join(lines)

705

706

@classmethod

707

def from_bytes(cls, bytes):

708

# TODO: This does extra string copying, probably better to do it a

709

# different way

710

(storage_kind, z_header_len, header_len,

711

block_len, rest) = bytes.split('\n', 4)

712

del bytes

713

if storage_kind != 'groupcompress-block':

714

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

715

z_header_len = int(z_header_len)

716

if len(rest) < z_header_len:

717

raise ValueError('Compressed header len shorter than all bytes')

718

z_header = rest[:z_header_len]

719

header_len = int(header_len)

720

header = zlib.decompress(z_header)

721

if len(header) != header_len:

722

raise ValueError('invalid length for decompressed bytes')

723

del z_header

724

block_len = int(block_len)

725

if len(rest) != z_header_len + block_len:

726

raise ValueError('Invalid length for block')

727

block_bytes = rest[z_header_len:]

728

del rest

729

# So now we have a valid GCB, we just need to parse the factories that

730

# were sent to us

731

header_lines = header.split('\n')

732

del header

733

last = header_lines.pop()

734

if last != '':

735

raise ValueError('header lines did not end with a trailing'

736

' newline')

737

if len(header_lines) % 4 != 0:

738

raise ValueError('The header was not an even multiple of 4 lines')

739

block = GroupCompressBlock.from_bytes(block_bytes)

740

del block_bytes

741

result = cls(block)

742

for start in xrange(0, len(header_lines), 4):

743

# intern()?

744

key = tuple(header_lines[start].split('\x00'))

745

parents_line = header_lines[start+1]

746

if parents_line == 'None:':

747

parents = None

748

else:

749

parents = tuple([tuple(segment.split('\x00'))

750

for segment in parents_line.split('\t')

751

if segment])

752

start_offset = int(header_lines[start+2])

753

end_offset = int(header_lines[start+3])

754

result.add_factory(key, parents, start_offset, end_offset)

755

return result

756

757

758

def network_block_to_records(storage_kind, bytes, line_end):

759

if storage_kind != 'groupcompress-block':

760

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

761

manager = _LazyGroupContentManager.from_bytes(bytes)

762

return manager.get_record_stream()

763

764

765

class _CommonGroupCompressor(object):

766

767

def __init__(self):

768

"""Create a GroupCompressor."""

769

self.chunks = []

770

self._last = None

771

self.endpoint = 0

772

self.input_bytes = 0

773

self.labels_deltas = {}

774

self._delta_index = None # Set by the children

775

self._block = GroupCompressBlock()

776

777

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

778

"""Compress lines with label key.

779

780

:param key: A key tuple. It is stored in the output

781

for identification of the text during decompression. If the last

782

element is 'None' it is replaced with the sha1 of the text -

783

e.g. sha1:xxxxxxx.

784

:param bytes: The bytes to be compressed

785

:param expected_sha: If non-None, the sha the lines are believed to

786

have. During compression the sha is calculated; a mismatch will

787

cause an error.

788

:param nostore_sha: If the computed sha1 sum matches, we will raise

789

ExistingContent rather than adding the text.

790

:param soft: Do a 'soft' compression. This means that we require larger

791

ranges to match to be considered for a copy command.

792

793

:return: The sha1 of lines, the start and end offsets in the delta, and

794

the type ('fulltext' or 'delta').

795

796

:seealso VersionedFiles.add_lines:

797

"""

798

if not bytes: # empty, like a dir entry, etc

799

if nostore_sha == _null_sha1:

800

raise errors.ExistingContent()

801

return _null_sha1, 0, 0, 'fulltext'

802

# we assume someone knew what they were doing when they passed it in

803

if expected_sha is not None:

804

sha1 = expected_sha

805

else:

806

sha1 = osutils.sha_string(bytes)

807

if nostore_sha is not None:

808

if sha1 == nostore_sha:

809

raise errors.ExistingContent()

810

if key[-1] is None:

811

key = key[:-1] + ('sha1:' + sha1,)

812

813

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

814

return sha1, start, end, type

815

816

def _compress(self, key, bytes, max_delta_size, soft=False):

817

"""Compress lines with label key.

818

819

:param key: A key tuple. It is stored in the output for identification

820

of the text during decompression.

821

822

:param bytes: The bytes to be compressed

823

824

:param max_delta_size: The size above which we issue a fulltext instead

825

of a delta.

826

827

:param soft: Do a 'soft' compression. This means that we require larger

828

ranges to match to be considered for a copy command.

829

830

:return: The sha1 of lines, the start and end offsets in the delta, and

831

the type ('fulltext' or 'delta').

832

"""

833

raise NotImplementedError(self._compress)

834

835

def extract(self, key):

836

"""Extract a key previously added to the compressor.

837

838

:param key: The key to extract.

839

:return: An iterable over bytes and the sha1.

840

"""

841

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

842

delta_chunks = self.chunks[start_chunk:end_chunk]

843

stored_bytes = ''.join(delta_chunks)

844

if stored_bytes[0] == 'f':

845

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

846

data_len = fulltext_len + 1 + offset

847

if data_len != len(stored_bytes):

848

raise ValueError('Index claimed fulltext len, but stored bytes'

849

' claim %s != %s'

850

% (len(stored_bytes), data_len))

851

bytes = stored_bytes[offset + 1:]

852

else:

853

# XXX: This is inefficient at best

854

source = ''.join(self.chunks[:start_chunk])

855

if stored_bytes[0] != 'd':

856

raise ValueError('Unknown content kind, bytes claim %s'

857

% (stored_bytes[0],))

858

delta_len, offset = decode_base128_int(stored_bytes[1:10])

859

data_len = delta_len + 1 + offset

860

if data_len != len(stored_bytes):

861

raise ValueError('Index claimed delta len, but stored bytes'

862

' claim %s != %s'

863

% (len(stored_bytes), data_len))

864

bytes = apply_delta(source, stored_bytes[offset + 1:])

865

bytes_sha1 = osutils.sha_string(bytes)

866

return bytes, bytes_sha1

867

868

def flush(self):

869

"""Finish this group, creating a formatted stream.

870

871

After calling this, the compressor should no longer be used

872

"""

873

# TODO: this causes us to 'bloat' to 2x the size of content in the

874

# group. This has an impact for 'commit' of large objects.

875

# One possibility is to use self._content_chunks, and be lazy and

876

# only fill out self._content as a full string when we actually

877

# need it. That would at least drop the peak memory consumption

878

# for 'commit' down to ~1x the size of the largest file, at a

879

# cost of increased complexity within this code. 2x is still <<

880

# 3x the size of the largest file, so we are doing ok.

881

self._block.set_chunked_content(self.chunks, self.endpoint)

882

self.chunks = None

883

self._delta_index = None

884

return self._block

885

886

def pop_last(self):

887

"""Call this if you want to 'revoke' the last compression.

888

889

After this, the data structures will be rolled back, but you cannot do

890

more compression.

891

"""

892

self._delta_index = None

893

del self.chunks[self._last[0]:]

894

self.endpoint = self._last[1]

895

self._last = None

896

897

def ratio(self):

898

"""Return the overall compression ratio."""

899

return float(self.input_bytes) / float(self.endpoint)

900

901

902

class PythonGroupCompressor(_CommonGroupCompressor):

903

904

def __init__(self):

905

"""Create a GroupCompressor.

906

907

Used only if the pyrex version is not available.

908

"""

909

super(PythonGroupCompressor, self).__init__()

910

self._delta_index = LinesDeltaIndex([])

911

# The actual content is managed by LinesDeltaIndex

912

self.chunks = self._delta_index.lines

913

914

def _compress(self, key, bytes, max_delta_size, soft=False):

915

"""see _CommonGroupCompressor._compress"""

916

input_len = len(bytes)

917

new_lines = osutils.split_lines(bytes)

918

out_lines, index_lines = self._delta_index.make_delta(

919

new_lines, bytes_length=input_len, soft=soft)

920

delta_length = sum(map(len, out_lines))

921

if delta_length > max_delta_size:

922

# The delta is longer than the fulltext, insert a fulltext

923

type = 'fulltext'

924

out_lines = ['f', encode_base128_int(input_len)]

925

out_lines.extend(new_lines)

926

index_lines = [False, False]

927

index_lines.extend([True] * len(new_lines))

928

else:

929

# this is a worthy delta, output it

930

type = 'delta'

931

out_lines[0] = 'd'

932

# Update the delta_length to include those two encoded integers

933

out_lines[1] = encode_base128_int(delta_length)

934

# Before insertion

935

start = self.endpoint

936

chunk_start = len(self.chunks)

937

self._last = (chunk_start, self.endpoint)

938

self._delta_index.extend_lines(out_lines, index_lines)

939

self.endpoint = self._delta_index.endpoint

940

self.input_bytes += input_len

941

chunk_end = len(self.chunks)

942

self.labels_deltas[key] = (start, chunk_start,

943

self.endpoint, chunk_end)

944

return start, self.endpoint, type

945

946

947

class PyrexGroupCompressor(_CommonGroupCompressor):

948

"""Produce a serialised group of compressed texts.

949

950

It contains code very similar to SequenceMatcher because of having a similar

951

task. However some key differences apply:

952

- there is no junk, we want a minimal edit not a human readable diff.

953

- we don't filter very common lines (because we don't know where a good

954

range will start, and after the first text we want to be emitting minmal

955

edits only.

956

- we chain the left side, not the right side

957

- we incrementally update the adjacency matrix as new lines are provided.

958

- we look for matches in all of the left side, so the routine which does

959

the analagous task of find_longest_match does not need to filter on the

960

left side.

961

"""

962

963

def __init__(self):

964

super(PyrexGroupCompressor, self).__init__()

965

self._delta_index = DeltaIndex()

966

967

def _compress(self, key, bytes, max_delta_size, soft=False):

968

"""see _CommonGroupCompressor._compress"""

969

input_len = len(bytes)

970

# By having action/label/sha1/len, we can parse the group if the index

971

# was ever destroyed, we have the key in 'label', we know the final

972

# bytes are valid from sha1, and we know where to find the end of this

973

# record because of 'len'. (the delta record itself will store the

974

# total length for the expanded record)

975

# 'len: %d\n' costs approximately 1% increase in total data

976

# Having the labels at all costs us 9-10% increase, 38% increase for

977

# inventory pages, and 5.8% increase for text pages

978

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

979

if self._delta_index._source_offset != self.endpoint:

980

raise AssertionError('_source_offset != endpoint'

981

' somehow the DeltaIndex got out of sync with'

982

' the output lines')

983

delta = self._delta_index.make_delta(bytes, max_delta_size)

984

if (delta is None):

985

type = 'fulltext'

986

enc_length = encode_base128_int(len(bytes))

987

len_mini_header = 1 + len(enc_length)

988

self._delta_index.add_source(bytes, len_mini_header)

989

new_chunks = ['f', enc_length, bytes]

990

else:

991

type = 'delta'

992

enc_length = encode_base128_int(len(delta))

993

len_mini_header = 1 + len(enc_length)

994

new_chunks = ['d', enc_length, delta]

995

self._delta_index.add_delta_source(delta, len_mini_header)

996

# Before insertion

997

start = self.endpoint

998

chunk_start = len(self.chunks)

999

# Now output these bytes

1000

self._output_chunks(new_chunks)

1001

self.input_bytes += input_len

1002

chunk_end = len(self.chunks)

1003

self.labels_deltas[key] = (start, chunk_start,

1004

self.endpoint, chunk_end)

1005

if not self._delta_index._source_offset == self.endpoint:

1006

raise AssertionError('the delta index is out of sync'

1007

'with the output lines %s != %s'

1008

% (self._delta_index._source_offset, self.endpoint))

1009

return start, self.endpoint, type

1010

1011

def _output_chunks(self, new_chunks):

1012

"""Output some chunks.

1013

1014

:param new_chunks: The chunks to output.

1015

"""

1016

self._last = (len(self.chunks), self.endpoint)

1017

endpoint = self.endpoint

1018

self.chunks.extend(new_chunks)

1019

endpoint += sum(map(len, new_chunks))

1020

self.endpoint = endpoint

1021

1022

1023

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

1024

"""Create a factory for creating a pack based groupcompress.

1025

1026

This is only functional enough to run interface tests, it doesn't try to

1027

provide a full pack environment.

1028

1029

:param graph: Store a graph.

1030

:param delta: Delta compress contents.

1031

:param keylength: How long should keys be.

1032

"""

1033

def factory(transport):

1034

parents = graph

1035

ref_length = 0

1036

if graph:

1037

ref_length = 1

1038

graph_index = BTreeBuilder(reference_lists=ref_length,

1039

key_elements=keylength)

1040

stream = transport.open_write_stream('newpack')

1041

writer = pack.ContainerWriter(stream.write)

1042

writer.begin()

1043

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

1044

add_callback=graph_index.add_nodes,

1045

inconsistency_fatal=inconsistency_fatal)

1046

access = knit._DirectPackAccess({})

1047

access.set_writer(writer, graph_index, (transport, 'newpack'))

1048

result = GroupCompressVersionedFiles(index, access, delta)

1049

result.stream = stream

1050

result.writer = writer

1051

return result

1052

return factory

1053

1054

1055

def cleanup_pack_group(versioned_files):

1056

versioned_files.writer.end()

1057

versioned_files.stream.close()

1058

1059

1060

class _BatchingBlockFetcher(object):

1061

"""Fetch group compress blocks in batches.

1062

1063

:ivar total_bytes: int of expected number of bytes needed to fetch the

1064

currently pending batch.

1065

"""

1066

1067

def __init__(self, gcvf, locations):

1068

self.gcvf = gcvf

1069

self.locations = locations

1070

self.keys = []

1071

self.batch_memos = {}

1072

self.memos_to_get = []

1073

self.total_bytes = 0

1074

self.last_read_memo = None

1075

self.manager = None

1076

1077

def add_key(self, key):

1078

"""Add another to key to fetch.

1079

1080

:return: The estimated number of bytes needed to fetch the batch so

1081

far.

1082

"""

1083

self.keys.append(key)

1084

index_memo, _, _, _ = self.locations[key]

1085

read_memo = index_memo[0:3]

1086

# Three possibilities for this read_memo:

1087

# - it's already part of this batch; or

1088

# - it's not yet part of this batch, but is already cached; or

1089

# - it's not yet part of this batch and will need to be fetched.

1090

if read_memo in self.batch_memos:

1091

# This read memo is already in this batch.

1092

return self.total_bytes

1093

try:

1094

cached_block = self.gcvf._group_cache[read_memo]

1095

except KeyError:

1096

# This read memo is new to this batch, and the data isn't cached

1097

# either.

1098

self.batch_memos[read_memo] = None

1099

self.memos_to_get.append(read_memo)

1100

byte_length = read_memo[2]

1101

self.total_bytes += byte_length

1102

else:

1103

# This read memo is new to this batch, but cached.

1104

# Keep a reference to the cached block in batch_memos because it's

1105

# certain that we'll use it when this batch is processed, but

1106

# there's a risk that it would fall out of _group_cache between now

1107

# and then.

1108

self.batch_memos[read_memo] = cached_block

1109

return self.total_bytes

1110

1111

def _flush_manager(self):

1112

if self.manager is not None:

1113

for factory in self.manager.get_record_stream():

1114

yield factory

1115

self.manager = None

1116

self.last_read_memo = None

1117

1118

def yield_factories(self, full_flush=False):

1119

"""Yield factories for keys added since the last yield. They will be

1120

returned in the order they were added via add_key.

1121

1122

:param full_flush: by default, some results may not be returned in case

1123

they can be part of the next batch. If full_flush is True, then

1124

all results are returned.

1125

"""

1126

if self.manager is None and not self.keys:

1127

return

1128

# Fetch all memos in this batch.

1129

blocks = self.gcvf._get_blocks(self.memos_to_get)

1130

# Turn blocks into factories and yield them.

1131

memos_to_get_stack = list(self.memos_to_get)

1132

memos_to_get_stack.reverse()

1133

for key in self.keys:

1134

index_memo, _, parents, _ = self.locations[key]

1135

read_memo = index_memo[:3]

1136

if self.last_read_memo != read_memo:

1137

# We are starting a new block. If we have a

1138

# manager, we have found everything that fits for

1139

# now, so yield records

1140

for factory in self._flush_manager():

1141

yield factory

1142

# Now start a new manager.

1143

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1144

# The next block from _get_blocks will be the block we

1145

# need.

1146

block_read_memo, block = blocks.next()

1147

if block_read_memo != read_memo:

1148

raise AssertionError(

1149

"block_read_memo out of sync with read_memo"

1150

"(%r != %r)" % (block_read_memo, read_memo))

1151

self.batch_memos[read_memo] = block

1152

memos_to_get_stack.pop()

1153

else:

1154

block = self.batch_memos[read_memo]

1155

self.manager = _LazyGroupContentManager(block)

1156

self.last_read_memo = read_memo

1157

start, end = index_memo[3:5]

1158

self.manager.add_factory(key, parents, start, end)

1159

if full_flush:

1160

for factory in self._flush_manager():

1161

yield factory

1162

del self.keys[:]

1163

self.batch_memos.clear()

1164

del self.memos_to_get[:]

1165

self.total_bytes = 0

1166

1167

1168

class GroupCompressVersionedFiles(VersionedFiles):

1169

"""A group-compress based VersionedFiles implementation."""

1170

1171

def __init__(self, index, access, delta=True):

1172

"""Create a GroupCompressVersionedFiles object.

1173

1174

:param index: The index object storing access and graph data.

1175

:param access: The access object storing raw data.

1176

:param delta: Whether to delta compress or just entropy compress.

1177

"""

1178

self._index = index

1179

self._access = access

1180

self._delta = delta

1181

self._unadded_refs = {}

1182

self._group_cache = LRUSizeCache(max_size=50*1024*1024)

1183

self._fallback_vfs = []

1184

1185

def add_lines(self, key, parents, lines, parent_texts=None,

1186

left_matching_blocks=None, nostore_sha=None, random_id=False,

1187

check_content=True):

1188

"""Add a text to the store.

1189

1190

:param key: The key tuple of the text to add.

1191

:param parents: The parents key tuples of the text to add.

1192

:param lines: A list of lines. Each line must be a bytestring. And all

1193

of them except the last must be terminated with \n and contain no

1194

other \n's. The last line may either contain no \n's or a single

1195

terminating \n. If the lines list does meet this constraint the add

1196

routine may error or may succeed - but you will be unable to read

1197

the data back accurately. (Checking the lines have been split

1198

correctly is expensive and extremely unlikely to catch bugs so it

1199

is not done at runtime unless check_content is True.)

1200

:param parent_texts: An optional dictionary containing the opaque

1201

representations of some or all of the parents of version_id to

1202

allow delta optimisations. VERY IMPORTANT: the texts must be those

1203

returned by add_lines or data corruption can be caused.

1204

:param left_matching_blocks: a hint about which areas are common

1205

between the text and its left-hand-parent. The format is

1206

the SequenceMatcher.get_matching_blocks format.

1207

:param nostore_sha: Raise ExistingContent and do not add the lines to

1208

the versioned file if the digest of the lines matches this.

1209

:param random_id: If True a random id has been selected rather than

1210

an id determined by some deterministic process such as a converter

1211

from a foreign VCS. When True the backend may choose not to check

1212

for uniqueness of the resulting key within the versioned file, so

1213

this should only be done when the result is expected to be unique

1214

anyway.

1215

:param check_content: If True, the lines supplied are verified to be

1216

bytestrings that are correctly formed lines.

1217

:return: The text sha1, the number of bytes in the text, and an opaque

1218

representation of the inserted version which can be provided

1219

back to future add_lines calls in the parent_texts dictionary.

1220

"""

1221

self._index._check_write_ok()

1222

self._check_add(key, lines, random_id, check_content)

1223

if parents is None:

1224

# The caller might pass None if there is no graph data, but kndx

1225

# indexes can't directly store that, so we give them

1226

# an empty tuple instead.

1227

parents = ()

1228

# double handling for now. Make it work until then.

1229

length = sum(map(len, lines))

1230

record = ChunkedContentFactory(key, parents, None, lines)

1231

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1232

nostore_sha=nostore_sha))[0]

1233

return sha1, length, None

1234

1235

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1236

"""See VersionedFiles._add_text()."""

1237

self._index._check_write_ok()

1238

self._check_add(key, None, random_id, check_content=False)

1239

if text.__class__ is not str:

1240

raise errors.BzrBadParameterUnicode("text")

1241

if parents is None:

1242

# The caller might pass None if there is no graph data, but kndx

1243

# indexes can't directly store that, so we give them

1244

# an empty tuple instead.

1245

parents = ()

1246

# double handling for now. Make it work until then.

1247

length = len(text)

1248

record = FulltextContentFactory(key, parents, None, text)

1249

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1250

nostore_sha=nostore_sha))[0]

1251

return sha1, length, None

1252

1253

def add_fallback_versioned_files(self, a_versioned_files):

1254

"""Add a source of texts for texts not present in this knit.

1255

1256

:param a_versioned_files: A VersionedFiles object.

1257

"""

1258

self._fallback_vfs.append(a_versioned_files)

1259

1260

def annotate(self, key):

1261

"""See VersionedFiles.annotate."""

1262

ann = annotate.Annotator(self)

1263

return ann.annotate_flat(key)

1264

1265

def get_annotator(self):

1266

return annotate.Annotator(self)

1267

1268

def check(self, progress_bar=None, keys=None):

1269

"""See VersionedFiles.check()."""

1270

if keys is None:

1271

keys = self.keys()

1272

for record in self.get_record_stream(keys, 'unordered', True):

1273

record.get_bytes_as('fulltext')

1274

else:

1275

return self.get_record_stream(keys, 'unordered', True)

1276

1277

def _check_add(self, key, lines, random_id, check_content):

1278

"""check that version_id and lines are safe to add."""

1279

version_id = key[-1]

1280

if version_id is not None:

1281

if osutils.contains_whitespace(version_id):

1282

raise errors.InvalidRevisionId(version_id, self)

1283

self.check_not_reserved_id(version_id)

1284

# TODO: If random_id==False and the key is already present, we should

1285

# probably check that the existing content is identical to what is

1286

# being inserted, and otherwise raise an exception. This would make

1287

# the bundle code simpler.

1288

if check_content:

1289

self._check_lines_not_unicode(lines)

1290

self._check_lines_are_lines(lines)

1291

1292

def get_known_graph_ancestry(self, keys):

1293

"""Get a KnownGraph instance with the ancestry of keys."""

1294

# Note that this is identical to

1295

# KnitVersionedFiles.get_known_graph_ancestry, but they don't share

1296

# ancestry.

1297

parent_map, missing_keys = self._index.find_ancestry(keys)

1298

for fallback in self._fallback_vfs:

1299

if not missing_keys:

1300

break

1301

(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(

1302

missing_keys)

1303

parent_map.update(f_parent_map)

1304

missing_keys = f_missing_keys

1305

kg = _mod_graph.KnownGraph(parent_map)

1306

return kg

1307

1308

def get_parent_map(self, keys):

1309

"""Get a map of the graph parents of keys.

1310

1311

:param keys: The keys to look up parents for.

1312

:return: A mapping from keys to parents. Absent keys are absent from

1313

the mapping.

1314

"""

1315

return self._get_parent_map_with_sources(keys)[0]

1316

1317

def _get_parent_map_with_sources(self, keys):

1318

"""Get a map of the parents of keys.

1319

1320

:param keys: The keys to look up parents for.

1321

:return: A tuple. The first element is a mapping from keys to parents.

1322

Absent keys are absent from the mapping. The second element is a

1323

list with the locations each key was found in. The first element

1324

is the in-this-knit parents, the second the first fallback source,

1325

and so on.

1326

"""

1327

result = {}

1328

sources = [self._index] + self._fallback_vfs

1329

source_results = []

1330

missing = set(keys)

1331

for source in sources:

1332

if not missing:

1333

break

1334

new_result = source.get_parent_map(missing)

1335

source_results.append(new_result)

1336

result.update(new_result)

1337

missing.difference_update(set(new_result))

1338

return result, source_results

1339

1340

def _get_blocks(self, read_memos):

1341

"""Get GroupCompressBlocks for the given read_memos.

1342

1343

:returns: a series of (read_memo, block) pairs, in the order they were

1344

originally passed.

1345

"""

1346

cached = {}

1347

for read_memo in read_memos:

1348

try:

1349

block = self._group_cache[read_memo]

1350

except KeyError:

1351

pass

1352

else:

1353

cached[read_memo] = block

1354

not_cached = []

1355

not_cached_seen = set()

1356

for read_memo in read_memos:

1357

if read_memo in cached:

1358

# Don't fetch what we already have

1359

continue

1360

if read_memo in not_cached_seen:

1361

# Don't try to fetch the same data twice

1362

continue

1363

not_cached.append(read_memo)

1364

not_cached_seen.add(read_memo)

1365

raw_records = self._access.get_raw_records(not_cached)

1366

for read_memo in read_memos:

1367

try:

1368

yield read_memo, cached[read_memo]

1369

except KeyError:

1370

# Read the block, and cache it.

1371

zdata = raw_records.next()

1372

block = GroupCompressBlock.from_bytes(zdata)

1373

self._group_cache[read_memo] = block

1374

cached[read_memo] = block

1375

yield read_memo, block

1376

1377

def get_missing_compression_parent_keys(self):

1378

"""Return the keys of missing compression parents.

1379

1380

Missing compression parents occur when a record stream was missing

1381

basis texts, or a index was scanned that had missing basis texts.

1382

"""

1383

# GroupCompress cannot currently reference texts that are not in the

1384

# group, so this is valid for now

1385

return frozenset()

1386

1387

def get_record_stream(self, keys, ordering, include_delta_closure):

1388

"""Get a stream of records for keys.

1389

1390

:param keys: The keys to include.

1391

:param ordering: Either 'unordered' or 'topological'. A topologically

1392

sorted stream has compression parents strictly before their

1393

children.

1394

:param include_delta_closure: If True then the closure across any

1395

compression parents will be included (in the opaque data).

1396

:return: An iterator of ContentFactory objects, each of which is only

1397

valid until the iterator is advanced.

1398

"""

1399

# keys might be a generator

1400

orig_keys = list(keys)

1401

keys = set(keys)

1402

if not keys:

1403

return

1404

if (not self._index.has_graph

1405

and ordering in ('topological', 'groupcompress')):

1406

# Cannot topological order when no graph has been stored.

1407

# but we allow 'as-requested' or 'unordered'

1408

ordering = 'unordered'

1409

1410

remaining_keys = keys

1411

while True:

1412

try:

1413

keys = set(remaining_keys)

1414

for content_factory in self._get_remaining_record_stream(keys,

1415

orig_keys, ordering, include_delta_closure):

1416

remaining_keys.discard(content_factory.key)

1417

yield content_factory

1418

return

1419

except errors.RetryWithNewPacks, e:

1420

self._access.reload_or_raise(e)

1421

1422

def _find_from_fallback(self, missing):

1423

"""Find whatever keys you can from the fallbacks.

1424

1425

:param missing: A set of missing keys. This set will be mutated as keys

1426

are found from a fallback_vfs

1427

:return: (parent_map, key_to_source_map, source_results)

1428

parent_map the overall key => parent_keys

1429

key_to_source_map a dict from {key: source}

1430

source_results a list of (source: keys)

1431

"""

1432

parent_map = {}

1433

key_to_source_map = {}

1434

source_results = []

1435

for source in self._fallback_vfs:

1436

if not missing:

1437

break

1438

source_parents = source.get_parent_map(missing)

1439

parent_map.update(source_parents)

1440

source_parents = list(source_parents)

1441

source_results.append((source, source_parents))

1442

key_to_source_map.update((key, source) for key in source_parents)

1443

missing.difference_update(source_parents)

1444

return parent_map, key_to_source_map, source_results

1445

1446

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1447

"""Get the (source, [keys]) list.

1448

1449

The returned objects should be in the order defined by 'ordering',

1450

which can weave between different sources.

1451

:param ordering: Must be one of 'topological' or 'groupcompress'

1452

:return: List of [(source, [keys])] tuples, such that all keys are in

1453

the defined order, regardless of source.

1454

"""

1455

if ordering == 'topological':

1456

present_keys = topo_sort(parent_map)

1457

else:

1458

# ordering == 'groupcompress'

1459

# XXX: This only optimizes for the target ordering. We may need

1460

# to balance that with the time it takes to extract

1461

# ordering, by somehow grouping based on

1462

# locations[key][0:3]

1463

present_keys = sort_gc_optimal(parent_map)

1464

# Now group by source:

1465

source_keys = []

1466

current_source = None

1467

for key in present_keys:

1468

source = key_to_source_map.get(key, self)

1469

if source is not current_source:

1470

source_keys.append((source, []))

1471

current_source = source

1472

source_keys[-1][1].append(key)

1473

return source_keys

1474

1475

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1476

key_to_source_map):

1477

source_keys = []

1478

current_source = None

1479

for key in orig_keys:

1480

if key in locations or key in unadded_keys:

1481

source = self

1482

elif key in key_to_source_map:

1483

source = key_to_source_map[key]

1484

else: # absent

1485

continue

1486

if source is not current_source:

1487

source_keys.append((source, []))

1488

current_source = source

1489

source_keys[-1][1].append(key)

1490

return source_keys

1491

1492

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1493

source_result):

1494

def get_group(key):

1495

# This is the group the bytes are stored in, followed by the

1496

# location in the group

1497

return locations[key][0]

1498

present_keys = sorted(locations.iterkeys(), key=get_group)

1499

# We don't have an ordering for keys in the in-memory object, but

1500

# lets process the in-memory ones first.

1501

present_keys = list(unadded_keys) + present_keys

1502

# Now grab all of the ones from other sources

1503

source_keys = [(self, present_keys)]

1504

source_keys.extend(source_result)

1505

return source_keys

1506

1507

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1508

include_delta_closure):

1509

"""Get a stream of records for keys.

1510

1511

:param keys: The keys to include.

1512

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1513

'as-requested'

1514

:param include_delta_closure: If True then the closure across any

1515

compression parents will be included (in the opaque data).

1516

:return: An iterator of ContentFactory objects, each of which is only

1517

valid until the iterator is advanced.

1518

"""

1519

# Cheap: iterate

1520

locations = self._index.get_build_details(keys)

1521

unadded_keys = set(self._unadded_refs).intersection(keys)

1522

missing = keys.difference(locations)

1523

missing.difference_update(unadded_keys)

1524

(fallback_parent_map, key_to_source_map,

1525

source_result) = self._find_from_fallback(missing)

1526

if ordering in ('topological', 'groupcompress'):

1527

# would be better to not globally sort initially but instead

1528

# start with one key, recurse to its oldest parent, then grab

1529

# everything in the same group, etc.

1530

parent_map = dict((key, details[2]) for key, details in

1531

locations.iteritems())

1532

for key in unadded_keys:

1533

parent_map[key] = self._unadded_refs[key]

1534

parent_map.update(fallback_parent_map)

1535

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1536

key_to_source_map)

1537

elif ordering == 'as-requested':

1538

source_keys = self._get_as_requested_source_keys(orig_keys,

1539

locations, unadded_keys, key_to_source_map)

1540

else:

1541

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1542

# Otherwise we thrash the _group_cache and destroy performance

1543

source_keys = self._get_io_ordered_source_keys(locations,

1544

unadded_keys, source_result)

1545

for key in missing:

1546

yield AbsentContentFactory(key)

1547

# Batch up as many keys as we can until either:

1548

# - we encounter an unadded ref, or

1549

# - we run out of keys, or

1550

# - the total bytes to retrieve for this batch > BATCH_SIZE

1551

batcher = _BatchingBlockFetcher(self, locations)

1552

for source, keys in source_keys:

1553

if source is self:

1554

for key in keys:

1555

if key in self._unadded_refs:

1556

# Flush batch, then yield unadded ref from

1557

# self._compressor.

1558

for factory in batcher.yield_factories(full_flush=True):

1559

yield factory

1560

bytes, sha1 = self._compressor.extract(key)

1561

parents = self._unadded_refs[key]

1562

yield FulltextContentFactory(key, parents, sha1, bytes)

1563

continue

1564

if batcher.add_key(key) > BATCH_SIZE:

1565

# Ok, this batch is big enough. Yield some results.

1566

for factory in batcher.yield_factories():

1567

yield factory

1568

else:

1569

for factory in batcher.yield_factories(full_flush=True):

1570

yield factory

1571

for record in source.get_record_stream(keys, ordering,

1572

include_delta_closure):

1573

yield record

1574

for factory in batcher.yield_factories(full_flush=True):

1575

yield factory

1576

1577

def get_sha1s(self, keys):

1578

"""See VersionedFiles.get_sha1s()."""

1579

result = {}

1580

for record in self.get_record_stream(keys, 'unordered', True):

1581

if record.sha1 != None:

1582

result[record.key] = record.sha1

1583

else:

1584

if record.storage_kind != 'absent':

1585

result[record.key] = osutils.sha_string(

1586

record.get_bytes_as('fulltext'))

1587

return result

1588

1589

def insert_record_stream(self, stream):

1590

"""Insert a record stream into this container.

1591

1592

:param stream: A stream of records to insert.

1593

:return: None

1594

:seealso VersionedFiles.get_record_stream:

1595

"""

1596

# XXX: Setting random_id=True makes

1597

# test_insert_record_stream_existing_keys fail for groupcompress and

1598

# groupcompress-nograph, this needs to be revisited while addressing

1599

# 'bzr branch' performance issues.

1600

for _ in self._insert_record_stream(stream, random_id=False):

1601

pass

1602

1603

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1604

reuse_blocks=True):

1605

"""Internal core to insert a record stream into this container.

1606

1607

This helper function has a different interface than insert_record_stream

1608

to allow add_lines to be minimal, but still return the needed data.

1609

1610

:param stream: A stream of records to insert.

1611

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1612

raise ExistingContent, rather than committing the new text.

1613

:param reuse_blocks: If the source is streaming from

1614

groupcompress-blocks, just insert the blocks as-is, rather than

1615

expanding the texts and inserting again.

1616

:return: An iterator over the sha1 of the inserted records.

1617

:seealso insert_record_stream:

1618

:seealso add_lines:

1619

"""

1620

adapters = {}

1621

def get_adapter(adapter_key):

1622

try:

1623

return adapters[adapter_key]

1624

except KeyError:

1625

adapter_factory = adapter_registry.get(adapter_key)

1626

adapter = adapter_factory(self)

1627

adapters[adapter_key] = adapter

1628

return adapter

1629

# This will go up to fulltexts for gc to gc fetching, which isn't

1630

# ideal.

1631

self._compressor = GroupCompressor()

1632

self._unadded_refs = {}

1633

keys_to_add = []

1634

def flush():

1635

bytes = self._compressor.flush().to_bytes()

1636

index, start, length = self._access.add_raw_records(

1637

[(None, len(bytes))], bytes)[0]

1638

nodes = []

1639

for key, reads, refs in keys_to_add:

1640

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1641

self._index.add_records(nodes, random_id=random_id)

1642

self._unadded_refs = {}

1643

del keys_to_add[:]

1644

self._compressor = GroupCompressor()

1645

1646

last_prefix = None

1647

max_fulltext_len = 0

1648

max_fulltext_prefix = None

1649

insert_manager = None

1650

block_start = None

1651

block_length = None

1652

# XXX: TODO: remove this, it is just for safety checking for now

1653

inserted_keys = set()

1654

reuse_this_block = reuse_blocks

1655

for record in stream:

1656

# Raise an error when a record is missing.

1657

if record.storage_kind == 'absent':

1658

raise errors.RevisionNotPresent(record.key, self)

1659

if random_id:

1660

if record.key in inserted_keys:

1661

trace.note('Insert claimed random_id=True,'

1662

' but then inserted %r two times', record.key)

1663

continue

1664

inserted_keys.add(record.key)

1665

if reuse_blocks:

1666

# If the reuse_blocks flag is set, check to see if we can just

1667

# copy a groupcompress block as-is.

1668

# We only check on the first record (groupcompress-block) not

1669

# on all of the (groupcompress-block-ref) entries.

1670

# The reuse_this_block flag is then kept for as long as

1671

if record.storage_kind == 'groupcompress-block':

1672

# Check to see if we really want to re-use this block

1673

insert_manager = record._manager

1674

reuse_this_block = insert_manager.check_is_well_utilized()

1675

else:

1676

reuse_this_block = False

1677

if reuse_this_block:

1678

# We still want to reuse this block

1679

if record.storage_kind == 'groupcompress-block':

1680

# Insert the raw block into the target repo

1681

insert_manager = record._manager

1682

bytes = record._manager._block.to_bytes()

1683

_, start, length = self._access.add_raw_records(

1684

[(None, len(bytes))], bytes)[0]

1685

del bytes

1686

block_start = start

1687

block_length = length

1688

if record.storage_kind in ('groupcompress-block',

1689

'groupcompress-block-ref'):

1690

if insert_manager is None:

1691

raise AssertionError('No insert_manager set')

1692

if insert_manager is not record._manager:

1693

raise AssertionError('insert_manager does not match'

1694

' the current record, we cannot be positive'

1695

' that the appropriate content was inserted.'

1696

)

1697

value = "%d %d %d %d" % (block_start, block_length,

1698

record._start, record._end)

1699

nodes = [(record.key, value, (record.parents,))]

1700

# TODO: Consider buffering up many nodes to be added, not

1701

# sure how much overhead this has, but we're seeing

1702

# ~23s / 120s in add_records calls

1703

self._index.add_records(nodes, random_id=random_id)

1704

continue

1705

try:

1706

bytes = record.get_bytes_as('fulltext')

1707

except errors.UnavailableRepresentation:

1708

adapter_key = record.storage_kind, 'fulltext'

1709

adapter = get_adapter(adapter_key)

1710

bytes = adapter.get_bytes(record)

1711

if len(record.key) > 1:

1712

prefix = record.key[0]

1713

soft = (prefix == last_prefix)

1714

else:

1715

prefix = None

1716

soft = False

1717

if max_fulltext_len < len(bytes):

1718

max_fulltext_len = len(bytes)

1719

max_fulltext_prefix = prefix

1720

(found_sha1, start_point, end_point,

1721

type) = self._compressor.compress(record.key,

1722

bytes, record.sha1, soft=soft,

1723

nostore_sha=nostore_sha)

1724

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1725

# Check if we want to continue to include that text

1726

if (prefix == max_fulltext_prefix

1727

and end_point < 2 * max_fulltext_len):

1728

# As long as we are on the same file_id, we will fill at least

1729

# 2 * max_fulltext_len

1730

start_new_block = False

1731

elif end_point > 4*1024*1024:

1732

start_new_block = True

1733

elif (prefix is not None and prefix != last_prefix

1734

and end_point > 2*1024*1024):

1735

start_new_block = True

1736

else:

1737

start_new_block = False

1738

last_prefix = prefix

1739

if start_new_block:

1740

self._compressor.pop_last()

1741

flush()

1742

max_fulltext_len = len(bytes)

1743

(found_sha1, start_point, end_point,

1744

type) = self._compressor.compress(record.key, bytes,

1745

record.sha1)

1746

if record.key[-1] is None:

1747

key = record.key[:-1] + ('sha1:' + found_sha1,)

1748

else:

1749

key = record.key

1750

self._unadded_refs[key] = record.parents

1751

yield found_sha1

1752

keys_to_add.append((key, '%d %d' % (start_point, end_point),

1753

(record.parents,)))

1754

if len(keys_to_add):

1755

flush()

1756

self._compressor = None

1757

1758

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1759

"""Iterate over the lines in the versioned files from keys.

1760

1761

This may return lines from other keys. Each item the returned

1762

iterator yields is a tuple of a line and a text version that that line

1763

is present in (not introduced in).

1764

1765

Ordering of results is in whatever order is most suitable for the

1766

underlying storage format.

1767

1768

If a progress bar is supplied, it may be used to indicate progress.

1769

The caller is responsible for cleaning up progress bars (because this

1770

is an iterator).

1771

1772

NOTES:

1773

* Lines are normalised by the underlying store: they will all have \n

1774

terminators.

1775

* Lines are returned in arbitrary order.

1776

1777

:return: An iterator over (line, key).

1778

"""

1779

keys = set(keys)

1780

total = len(keys)

1781

# we don't care about inclusions, the caller cares.

1782

# but we need to setup a list of records to visit.

1783

# we need key, position, length

1784

for key_idx, record in enumerate(self.get_record_stream(keys,

1785

'unordered', True)):

1786

# XXX: todo - optimise to use less than full texts.

1787

key = record.key

1788

if pb is not None:

1789

pb.update('Walking content', key_idx, total)

1790

if record.storage_kind == 'absent':

1791

raise errors.RevisionNotPresent(key, self)

1792

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1793

for line in lines:

1794

yield line, key

1795

if pb is not None:

1796

pb.update('Walking content', total, total)

1797

1798

def keys(self):

1799

"""See VersionedFiles.keys."""

1800

if 'evil' in debug.debug_flags:

1801

trace.mutter_callsite(2, "keys scales with size of history")

1802

sources = [self._index] + self._fallback_vfs

1803

result = set()

1804

for source in sources:

1805

result.update(source.keys())

1806

return result

1807

1808

1809

class _GCGraphIndex(object):

1810

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1811

1812

def __init__(self, graph_index, is_locked, parents=True,

1813

add_callback=None, track_external_parent_refs=False,

1814

inconsistency_fatal=True):

1815

"""Construct a _GCGraphIndex on a graph_index.

1816

1817

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1818

:param is_locked: A callback, returns True if the index is locked and

1819

thus usable.

1820

:param parents: If True, record knits parents, if not do not record

1821

parents.

1822

:param add_callback: If not None, allow additions to the index and call

1823

this callback with a list of added GraphIndex nodes:

1824

[(node, value, node_refs), ...]

1825

:param track_external_parent_refs: As keys are added, keep track of the

1826

keys they reference, so that we can query get_missing_parents(),

1827

etc.

1828

:param inconsistency_fatal: When asked to add records that are already

1829

present, and the details are inconsistent with the existing

1830

record, raise an exception instead of warning (and skipping the

1831

record).

1832

"""

1833

self._add_callback = add_callback

1834

self._graph_index = graph_index

1835

self._parents = parents

1836

self.has_graph = parents

1837

self._is_locked = is_locked

1838

self._inconsistency_fatal = inconsistency_fatal

1839

if track_external_parent_refs:

1840

self._key_dependencies = knit._KeyRefs()

1841

else:

1842

self._key_dependencies = None

1843

1844

def add_records(self, records, random_id=False):

1845

"""Add multiple records to the index.

1846

1847

This function does not insert data into the Immutable GraphIndex

1848

backing the KnitGraphIndex, instead it prepares data for insertion by

1849

the caller and checks that it is safe to insert then calls

1850

self._add_callback with the prepared GraphIndex nodes.

1851

1852

:param records: a list of tuples:

1853

(key, options, access_memo, parents).

1854

:param random_id: If True the ids being added were randomly generated

1855

and no check for existence will be performed.

1856

"""

1857

if not self._add_callback:

1858

raise errors.ReadOnlyError(self)

1859

# we hope there are no repositories with inconsistent parentage

1860

# anymore.

1861

1862

changed = False

1863

keys = {}

1864

for (key, value, refs) in records:

1865

if not self._parents:

1866

if refs:

1867

for ref in refs:

1868

if ref:

1869

raise errors.KnitCorrupt(self,

1870

"attempt to add node with parents "

1871

"in parentless index.")

1872

refs = ()

1873

changed = True

1874

keys[key] = (value, refs)

1875

# check for dups

1876

if not random_id:

1877

present_nodes = self._get_entries(keys)

1878

for (index, key, value, node_refs) in present_nodes:

1879

if node_refs != keys[key][1]:

1880

details = '%s %s %s' % (key, (value, node_refs), keys[key])

1881

if self._inconsistency_fatal:

1882

raise errors.KnitCorrupt(self, "inconsistent details"

1883

" in add_records: %s" %

1884

details)

1885

else:

1886

trace.warning("inconsistent details in skipped"

1887

" record: %s", details)

1888

del keys[key]

1889

changed = True

1890

if changed:

1891

result = []

1892

if self._parents:

1893

for key, (value, node_refs) in keys.iteritems():

1894

result.append((key, value, node_refs))

1895

else:

1896

for key, (value, node_refs) in keys.iteritems():

1897

result.append((key, value))

1898

records = result

1899

key_dependencies = self._key_dependencies

1900

if key_dependencies is not None and self._parents:

1901

for key, value, refs in records:

1902

parents = refs[0]

1903

key_dependencies.add_references(key, parents)

1904

self._add_callback(records)

1905

1906

def _check_read(self):

1907

"""Raise an exception if reads are not permitted."""

1908

if not self._is_locked():

1909

raise errors.ObjectNotLocked(self)

1910

1911

def _check_write_ok(self):

1912

"""Raise an exception if writes are not permitted."""

1913

if not self._is_locked():

1914

raise errors.ObjectNotLocked(self)

1915

1916

def _get_entries(self, keys, check_present=False):

1917

"""Get the entries for keys.

1918

1919

Note: Callers are responsible for checking that the index is locked

1920

before calling this method.

1921

1922

:param keys: An iterable of index key tuples.

1923

"""

1924

keys = set(keys)

1925

found_keys = set()

1926

if self._parents:

1927

for node in self._graph_index.iter_entries(keys):

1928

yield node

1929

found_keys.add(node[1])

1930

else:

1931

# adapt parentless index to the rest of the code.

1932

for node in self._graph_index.iter_entries(keys):

1933

yield node[0], node[1], node[2], ()

1934

found_keys.add(node[1])

1935

if check_present:

1936

missing_keys = keys.difference(found_keys)

1937

if missing_keys:

1938

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1939

1940

def find_ancestry(self, keys):

1941

"""See CombinedGraphIndex.find_ancestry"""

1942

return self._graph_index.find_ancestry(keys, 0)

1943

1944

def get_parent_map(self, keys):

1945

"""Get a map of the parents of keys.

1946

1947

:param keys: The keys to look up parents for.

1948

:return: A mapping from keys to parents. Absent keys are absent from

1949

the mapping.

1950

"""

1951

self._check_read()

1952

nodes = self._get_entries(keys)

1953

result = {}

1954

if self._parents:

1955

for node in nodes:

1956

result[node[1]] = node[3][0]

1957

else:

1958

for node in nodes:

1959

result[node[1]] = None

1960

return result

1961

1962

def get_missing_parents(self):

1963

"""Return the keys of missing parents."""

1964

# Copied from _KnitGraphIndex.get_missing_parents

1965

# We may have false positives, so filter those out.

1966

self._key_dependencies.add_keys(

1967

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1968

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1969

1970

def get_build_details(self, keys):

1971

"""Get the various build details for keys.

1972

1973

Ghosts are omitted from the result.

1974

1975

:param keys: An iterable of keys.

1976

:return: A dict of key:

1977

(index_memo, compression_parent, parents, record_details).

1978

index_memo

1979

opaque structure to pass to read_records to extract the raw

1980

data

1981

compression_parent

1982

Content that this record is built upon, may be None

1983

parents

1984

Logical parents of this node

1985

record_details

1986

extra information about the content which needs to be passed to

1987

Factory.parse_record

1988

"""

1989

self._check_read()

1990

result = {}

1991

entries = self._get_entries(keys)

1992

for entry in entries:

1993

key = entry[1]

1994

if not self._parents:

1995

parents = None

1996

else:

1997

parents = entry[3][0]

1998

method = 'group'

1999

result[key] = (self._node_to_position(entry),

2000

None, parents, (method, None))

2001

return result

2002

2003

def keys(self):

2004

"""Get all the keys in the collection.

2005

2006

The keys are not ordered.

2007

"""

2008

self._check_read()

2009

return [node[1] for node in self._graph_index.iter_all_entries()]

2010

2011

def _node_to_position(self, node):

2012

"""Convert an index value to position details."""

2013

bits = node[2].split(' ')

2014

# It would be nice not to read the entire gzip.

2015

start = int(bits[0])

2016

stop = int(bits[1])

2017

basis_end = int(bits[2])

2018

delta_end = int(bits[3])

2019

return node[0], start, stop, basis_end, delta_end

2020

2021

def scan_unvalidated_index(self, graph_index):

2022

"""Inform this _GCGraphIndex that there is an unvalidated index.

2023

2024

This allows this _GCGraphIndex to keep track of any missing

2025

compression parents we may want to have filled in to make those

2026

indices valid.

2027

2028

:param graph_index: A GraphIndex

2029

"""

2030

if self._key_dependencies is not None:

2031

# Add parent refs from graph_index (and discard parent refs that

2032

# the graph_index has).

2033

add_refs = self._key_dependencies.add_references

2034

for node in graph_index.iter_all_entries():

2035

add_refs(node[1], node[3][0])

2036

2037

2038

2039

from bzrlib._groupcompress_py import (

2040

apply_delta,

2041

apply_delta_to_source,

2042

encode_base128_int,

2043

decode_base128_int,

2044

decode_copy_instruction,

2045

LinesDeltaIndex,

2046

)

2047

try:

2048

from bzrlib._groupcompress_pyx import (

2049

apply_delta,

2050

apply_delta_to_source,

2051

DeltaIndex,

2052

encode_base128_int,

2053

decode_base128_int,

2054

)

2055

GroupCompressor = PyrexGroupCompressor

2056

except ImportError:

2057

GroupCompressor = PythonGroupCompressor

2058

Older »