~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Martin Pool
Date: 2005-11-22 04:30:14 UTC
mto: (1508.1.15) (1185.74.1 bzr.dev (Main development branch)) (1505.1.19 bzr-bound-branch) (1526.1.3 run_tests_twice_for_i18n) (1540.1.3 bzr.dev (Main development branch)) (1685.1.1 bzr-encoding) (1534.1.1 integration) (1553.5.1 bzr.dev (Main development branch)) (1608.2.1 bzr.mbp.escape-stores)
mto: This revision was merged to the branch mainline in revision 1510.
Revision ID: mbp@sourcefrog.net-20051122043014-6df209fd93033b59

Exclude more files from dumb-rsync upload

files added:
NEWS.developers

build-api

bzrlib/clone.py

bzrlib/graph.py

bzrlib/merge_core.py

bzrlib/selftest/testgraph.py

bzrlib/selftest/testreweave.py

bzrlib/textui.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

contrib/add-bzr-to-baz

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/zsh/_bzr

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

files removed:
.testr.conf

BRANCH.TODO

COPYING.txt

MANIFEST.in

NEWS

README_BDIST_RPM

apport

apport/README

apport/bzr-crashdb.conf

apport/source_bzr.py

bzr.ico

bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_export_c_api.h

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_import_c_api.h

bzrlib/_knit_load_data_py.py

bzrlib/_knit_load_data_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/bencode.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzr_distutils.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/cethread.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cleanup.py

bzrlib/cmd_test_script.py

bzrlib/cmd_version_info.py

bzrlib/cmdline.py

bzrlib/commit_signature_commands.py

bzrlib/controldir.py

bzrlib/counted_lock.py

bzrlib/crash.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/doc_generate

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/doc_generate/builders

bzrlib/doc_generate/builders/__init__.py

bzrlib/doc_generate/builders/texinfo.py

bzrlib/doc_generate/conf.py

bzrlib/doc_generate/writers

bzrlib/doc_generate/writers/__init__.py

bzrlib/doc_generate/writers/texinfo.py

bzrlib/email_message.py

bzrlib/estimate_compressed_size.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/export_pot.py

bzrlib/fifo_cache.py

bzrlib/filter_tree.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/help_topics/en/url-special-chars.txt

bzrlib/help_topics/es

bzrlib/help_topics/es/conflict-types.txt

bzrlib/hooks.py

bzrlib/i18n.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/inventory_delta.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/library_state.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/mergetools.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/bash_completion

bzrlib/plugins/bash_completion/README.txt

bzrlib/plugins/bash_completion/__init__.py

bzrlib/plugins/bash_completion/bashcomp.py

bzrlib/plugins/bash_completion/tests

bzrlib/plugins/bash_completion/tests/__init__.py

bzrlib/plugins/bash_completion/tests/test_bashcomp.py

bzrlib/plugins/changelog_merge

bzrlib/plugins/changelog_merge/__init__.py

bzrlib/plugins/changelog_merge/changelog_merge.py

bzrlib/plugins/changelog_merge/tests

bzrlib/plugins/changelog_merge/tests/__init__.py

bzrlib/plugins/changelog_merge/tests/test_changelog_merge.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_api.py

bzrlib/plugins/launchpad/lp_api_lite.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_api.py

bzrlib/plugins/launchpad/test_lp_api_lite.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/plugins/news_merge

bzrlib/plugins/news_merge/README

bzrlib/plugins/news_merge/__init__.py

bzrlib/plugins/news_merge/news_merge.py

bzrlib/plugins/news_merge/parser.py

bzrlib/plugins/news_merge/tests

bzrlib/plugins/news_merge/tests/__init__.py

bzrlib/plugins/news_merge/tests/test_news_merge.py

bzrlib/plugins/po_merge

bzrlib/plugins/po_merge/README

bzrlib/plugins/po_merge/__init__.py

bzrlib/plugins/po_merge/po_merge.py

bzrlib/plugins/po_merge/tests

bzrlib/plugins/po_merge/tests/__init__.py

bzrlib/plugins/po_merge/tests/test_po_merge.py

bzrlib/plugins/weave_fmt

bzrlib/plugins/weave_fmt/__init__.py

bzrlib/plugins/weave_fmt/branch.py

bzrlib/plugins/weave_fmt/bzrdir.py

bzrlib/plugins/weave_fmt/repository.py

bzrlib/plugins/weave_fmt/test_bzrdir.py

bzrlib/plugins/weave_fmt/test_repository.py

bzrlib/plugins/weave_fmt/test_workingtree.py

bzrlib/plugins/weave_fmt/tests

bzrlib/plugins/weave_fmt/workingtree.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/pyutils.py

bzrlib/readdir.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/recordcounter.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitpack_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/rules.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/signals.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/static_tuple.py

bzrlib/store/revision

bzrlib/store/versioned

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_config.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_export_pot.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_lookup_revision.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mkdir.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_remember_option.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_repair_workingtree.py

bzrlib/tests/blackbox/test_resolve.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_script.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shell_complete.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_revert.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/doc_generate

bzrlib/tests/doc_generate/__init__.py

bzrlib/tests/doc_generate/builders

bzrlib/tests/doc_generate/builders/__init__.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/doc_generate/writers

bzrlib/tests/doc_generate/writers/__init__.py

bzrlib/tests/doc_generate/writers/test_texinfo.py

bzrlib/tests/fake_command.py

bzrlib/tests/features.py

bzrlib/tests/file_utils.py

bzrlib/tests/fixtures.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/matchers.py

bzrlib/tests/per_branch

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_config.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_hooks.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_last_revision_info.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_controldir

bzrlib/tests/per_controldir/__init__.py

bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_controldir/test_format.py

bzrlib/tests/per_controldir/test_push.py

bzrlib/tests/per_controldir_colo

bzrlib/tests/per_controldir_colo/__init__.py

bzrlib/tests/per_controldir_colo/test_supported.py

bzrlib/tests/per_controldir_colo/test_unsupported.py

bzrlib/tests/per_foreign_vcs

bzrlib/tests/per_foreign_vcs/__init__.py

bzrlib/tests/per_foreign_vcs/test_branch.py

bzrlib/tests/per_foreign_vcs/test_repository.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_copy_content_into.py

bzrlib/tests/per_interbranch/test_fetch.py

bzrlib/tests/per_interbranch/test_get.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_intertree/test_file_content_matches.py

bzrlib/tests/per_inventory

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_merger.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_file_graph.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_locking.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_signatures.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test__make_parents_provider.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_commit_with_stacking.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_repository_reference/test_graph.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/per_repository_vf

bzrlib/tests/per_repository_vf/__init__.py

bzrlib/tests/per_repository_vf/helpers.py

bzrlib/tests/per_repository_vf/test__generate_text_key_index.py

bzrlib/tests/per_repository_vf/test_add_inventory_by_delta.py

bzrlib/tests/per_repository_vf/test_check.py

bzrlib/tests/per_repository_vf/test_check_reconcile.py

bzrlib/tests/per_repository_vf/test_fetch.py

bzrlib/tests/per_repository_vf/test_fileid_involved.py

bzrlib/tests/per_repository_vf/test_find_text_key_references.py

bzrlib/tests/per_repository_vf/test_merge_directive.py

bzrlib/tests/per_repository_vf/test_reconcile.py

bzrlib/tests/per_repository_vf/test_refresh_data.py

bzrlib/tests/per_repository_vf/test_repository.py

bzrlib/tests/per_repository_vf/test_write_group.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_export.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_is_executable.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_locking.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_uifactory

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_check.py

bzrlib/tests/per_workingtree/test_check_state.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_symlinks.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/scenarios.py

bzrlib/tests/script.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__annotator.py

bzrlib/tests/test__bencode.py

bzrlib/tests/test__btree_serializer.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test__rio.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_cethread.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chk_serializer.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_cmdline.py

bzrlib/tests/test_controldir.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_estimate_compressed_size.py

bzrlib/tests/test_export.py

bzrlib/tests/test_export_pot.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_features.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filter_tree.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_fixtures.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_https_urllib.py

bzrlib/tests/test_i18n.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_library_state.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_matchers.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_mergetools.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/binary-after-normal.patch

bzrlib/tests/test_patches_data/binary.patch

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_pyutils.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_scenarios.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_server.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_signals.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_test_server.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_treeshape.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_url_policy_open.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_utextwrap.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_vf_search.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/testui.py

bzrlib/tests/transport_util.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/timestamp.py

bzrlib/transform.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/gio_transport.py

bzrlib/transport/http

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/pathfilter.py

bzrlib/transport/readonly.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/url_policy_open.py

bzrlib/urlutils.py

bzrlib/utextwrap.py

bzrlib/util/_bencode_py.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/vf_repository.py

bzrlib/vf_search.py

bzrlib/views.py

bzrlib/workingtree_3.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzr

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

contrib/debian

contrib/debian/default

contrib/debian/init.d

contrib/zsh/README

doc/Bazaar-Logo-For-Manuals.png

doc/default.css

doc/developers

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr-doc.css

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/apport.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bug-handling.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/check.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/conf.py

doc/developers/configuration.txt

doc/developers/container-format.txt

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/documenting-changes.txt

doc/developers/ec2.txt

doc/developers/feature-flags.txt

doc/developers/fetch.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/implementation-notes.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/miscellaneous-notes.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/new-config-rationale.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plans.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/principles.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/specifications.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/transports.txt

doc/developers/ui.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/developers/win32_build_setup.txt

doc/developers/xdg_config_spec.txt

doc/en

doc/en/Makefile

doc/en/_static

doc/en/_static/bzr icon 16.png

doc/en/_static/bzr.ico

doc/en/_static/en

doc/en/_static/en/Makefile

doc/en/_static/en/bzr-en-quick-reference.pdf

doc/en/_static/en/bzr-en-quick-reference.png

doc/en/_static/en/bzr-en-quick-reference.svg

doc/en/_templates

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/admin-guide

doc/en/admin-guide/advanced.txt

doc/en/admin-guide/backup.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/index-plain.txt

doc/en/admin-guide/index.txt

doc/en/admin-guide/integration.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/licence.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/security.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/make.bat

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/index.txt

doc/en/release-notes

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/release-notes/bzr-2.4.txt

doc/en/release-notes/bzr-2.5.txt

doc/en/release-notes/bzr-2.6.txt

doc/en/release-notes/release-template.txt

doc/en/release-notes/series-template.txt

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/index.txt

doc/en/tutorials/licence.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/licence.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/gpg_signatures.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/index-plain.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/licence.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/en/whats-new

doc/en/whats-new/template.txt

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

doc/en/whats-new/whats-new-in-2.4.txt

doc/en/whats-new/whats-new-in-2.5.txt

doc/en/whats-new/whats-new-in-2.6.txt

doc/es

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_static/es/Makefile

doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/_static/es/bzr-es-quick-reference.png

doc/es/_static/es/bzr-es-quick-reference.svg

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/index.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/quick-reference

doc/es/quick-reference/index.txt

doc/es/user-guide

doc/es/user-guide/index-plain.txt

doc/es/user-guide/index.txt

doc/es/user-guide/version_info.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/index.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/licence.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index-plain.txt

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/licence.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

doc/news-template.txt

doc/ru

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_static/ru/Makefile

doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/index.txt

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index-plain.txt

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

man1

po/ar.po

po/ast.po

po/bs.po

po/bzr.pot

po/ca.po

po/de.po

po/en_AU.po

po/en_GB.po

po/es.po

po/fo.po

po/fr.po

po/gl.po

po/it.po

po/ja.po

po/ms.po

po/nl.po

po/oc.po

po/pl.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sco.po

po/tr.po

po/ug.po

profile_imports.py

tools/__init__.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/fixed-in.py

tools/generate_docs.py

tools/generate_release_notes.py

tools/package_docs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-control.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/riodemo.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/subunit-sum

tools/time_graph.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bootstrap.py

tools/win32/build_release.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/py2exe_boot_common.py

tools/win32/run_script.py

tools/win32/start_bzr.bat

files renamed:
doc/developers/HACKING.txt => HACKING

doc/en/release-notes/bzr-2.3.txt => NEWS

bzrlib/doc_generate/autodoc_man.py => bzr_man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/http_utils.py => bzrlib/selftest/HTTPTestUtil.py

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_commands.py => bzrlib/selftest/test_command.py

bzrlib/tests/per_branch/test_parent.py => bzrlib/selftest/test_parent.py

bzrlib/tests/blackbox/test_revision_info.py => bzrlib/selftest/test_revision_info.py

bzrlib/tests/blackbox/test_annotate.py => bzrlib/selftest/testannotate.py

bzrlib/tests/test_api.py => bzrlib/selftest/testapi.py

bzrlib/tests/per_branch/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_config.py => bzrlib/selftest/testconfig.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_fetch.py => bzrlib/selftest/testfetch.py

bzrlib/tests/test_gpg.py => bzrlib/selftest/testgpg.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_http.py => bzrlib/selftest/testhttp.py

bzrlib/tests/test_identitymap.py => bzrlib/selftest/testidentitymap.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge.py => bzrlib/selftest/testmerge.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_nonascii.py => bzrlib/selftest/testnonascii.py

bzrlib/tests/test_options.py => bzrlib/selftest/testoptions.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/testplugins.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionspec.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/per_repository/test_revision.py => bzrlib/selftest/testrevprops.py

bzrlib/tests/test_sampler.py => bzrlib/selftest/testsampler.py

bzrlib/tests/test_sftp_transport.py => bzrlib/selftest/testsftp.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/test_store.py => bzrlib/selftest/teststore.py

bzrlib/tests/test_testament.py => bzrlib/selftest/testtestament.py

bzrlib/tests/test_trace.py => bzrlib/selftest/testtrace.py

bzrlib/tests/test_transactions.py => bzrlib/selftest/testtransactions.py

bzrlib/tests/test_transport.py => bzrlib/selftest/testtransport.py

bzrlib/tests/test_tsort.py => bzrlib/selftest/testtsort.py

bzrlib/tests/test_workingtree.py => bzrlib/selftest/testworkingtree.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/versioned/__init__.py => bzrlib/store/weave.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py => bzrlib/transport/http.py

bzrlib/ui/__init__.py => bzrlib/ui.py

bzrlib/win32utils.py => bzrlib/win32console.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/plugins/weave_fmt/xml4.py => bzrlib/xml4.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/tutorials/tutorial.txt => tutorial.txt

files modified:
.bzrignore

INSTALL

Makefile

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/stub_sftp.py

bzrlib/selftest/test_ancestry.py

bzrlib/selftest/test_bad_files.py

bzrlib/selftest/test_commit.py

bzrlib/selftest/test_commit_merge.py

bzrlib/selftest/test_conflicts.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_upgrade.py

bzrlib/selftest/test_weave.py

bzrlib/selftest/test_xml.py

bzrlib/selftest/treeshape.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/testament.py

bzrlib/textinv.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transport/__init__.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/upgrade.py

bzrlib/util/__init__.py

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

setup.py

tools/capture_tree.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Core compression logic for compressing streams of related files."""

from __future__ import absolute_import

import time

import zlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

config,

debug,

errors,

graph as _mod_graph,

osutils,

pack,

static_tuple,

trace,

tsort,

)

from bzrlib.repofmt import pack_repo

from bzrlib.i18n import gettext

""")

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.versionedfile import (

_KeyRefs,

adapter_registry,

AbsentContentFactory,

ChunkedContentFactory,

FulltextContentFactory,

VersionedFilesWithFallbacks,

)

# Minimum number of uncompressed bytes to try fetch at once when retrieving

# groupcompress blocks.

BATCH_SIZE = 2**16

# osutils.sha_string('')

_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'

def sort_gc_optimal(parent_map):

"""Sort and group the keys in parent_map into groupcompress order.

groupcompress is defined (currently) as reverse-topological order, grouped

by the key prefix.

:return: A sorted-list of keys

"""

# groupcompress ordering is approximately reverse topological,

# properly grouped by file-id.

per_prefix_map = {}

for key, value in parent_map.iteritems():

if isinstance(key, str) or len(key) == 1:

prefix = ''

else:

prefix = key[0]

try:

per_prefix_map[prefix][key] = value

except KeyError:

per_prefix_map[prefix] = {key: value}

present_keys = []

for prefix in sorted(per_prefix_map):

present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))

return present_keys

# The max zlib window size is 32kB, so if we set 'max_size' output of the

# decompressor to the requested bytes + 32kB, then we should guarantee

# num_bytes coming out.

_ZLIB_DECOMP_WINDOW = 32*1024

class GroupCompressBlock(object):

"""An object which maintains the internal structure of the compressed data.

This tracks the meta info (start of text, length, type, etc.)

"""

# Group Compress Block v1 Zlib

100

GCB_HEADER = 'gcb1z\n'

101

# Group Compress Block v1 Lzma

102

GCB_LZ_HEADER = 'gcb1l\n'

103

GCB_KNOWN_HEADERS = (GCB_HEADER, GCB_LZ_HEADER)

104

105

def __init__(self):

106

# map by key? or just order in file?

107

self._compressor_name = None

108

self._z_content_chunks = None

109

self._z_content_decompressor = None

110

self._z_content_length = None

111

self._content_length = None

112

self._content = None

113

self._content_chunks = None

114

115

def __len__(self):

116

# This is the maximum number of bytes this object will reference if

117

# everything is decompressed. However, if we decompress less than

118

# everything... (this would cause some problems for LRUSizeCache)

119

return self._content_length + self._z_content_length

120

121

def _ensure_content(self, num_bytes=None):

122

"""Make sure that content has been expanded enough.

123

124

:param num_bytes: Ensure that we have extracted at least num_bytes of

125

content. If None, consume everything

126

"""

127

if self._content_length is None:

128

raise AssertionError('self._content_length should never be None')

129

if num_bytes is None:

130

num_bytes = self._content_length

131

elif (self._content_length is not None

132

and num_bytes > self._content_length):

133

raise AssertionError(

134

'requested num_bytes (%d) > content length (%d)'

135

% (num_bytes, self._content_length))

136

# Expand the content if required

137

if self._content is None:

138

if self._content_chunks is not None:

139

self._content = ''.join(self._content_chunks)

140

self._content_chunks = None

141

if self._content is None:

142

# We join self._z_content_chunks here, because if we are

143

# decompressing, then it is *very* likely that we have a single

144

# chunk

145

if self._z_content_chunks is None:

146

raise AssertionError('No content to decompress')

147

z_content = ''.join(self._z_content_chunks)

148

if z_content == '':

149

self._content = ''

150

elif self._compressor_name == 'lzma':

151

# We don't do partial lzma decomp yet

152

import pylzma

153

self._content = pylzma.decompress(z_content)

154

elif self._compressor_name == 'zlib':

155

# Start a zlib decompressor

156

if num_bytes * 4 > self._content_length * 3:

157

# If we are requesting more that 3/4ths of the content,

158

# just extract the whole thing in a single pass

159

num_bytes = self._content_length

160

self._content = zlib.decompress(z_content)

161

else:

162

self._z_content_decompressor = zlib.decompressobj()

163

# Seed the decompressor with the uncompressed bytes, so

164

# that the rest of the code is simplified

165

self._content = self._z_content_decompressor.decompress(

166

z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

167

if not self._z_content_decompressor.unconsumed_tail:

168

self._z_content_decompressor = None

169

else:

170

raise AssertionError('Unknown compressor: %r'

171

% self._compressor_name)

172

# Any bytes remaining to be decompressed will be in the decompressors

173

# 'unconsumed_tail'

174

175

# Do we have enough bytes already?

176

if len(self._content) >= num_bytes:

177

return

178

# If we got this far, and don't have a decompressor, something is wrong

179

if self._z_content_decompressor is None:

180

raise AssertionError(

181

'No decompressor to decompress %d bytes' % num_bytes)

182

remaining_decomp = self._z_content_decompressor.unconsumed_tail

183

if not remaining_decomp:

184

raise AssertionError('Nothing left to decompress')

185

needed_bytes = num_bytes - len(self._content)

186

# We always set max_size to 32kB over the minimum needed, so that

187

# zlib will give us as much as we really want.

188

# TODO: If this isn't good enough, we could make a loop here,

189

# that keeps expanding the request until we get enough

190

self._content += self._z_content_decompressor.decompress(

191

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

192

if len(self._content) < num_bytes:

193

raise AssertionError('%d bytes wanted, only %d available'

194

% (num_bytes, len(self._content)))

195

if not self._z_content_decompressor.unconsumed_tail:

196

# The stream is finished

197

self._z_content_decompressor = None

198

199

def _parse_bytes(self, bytes, pos):

200

"""Read the various lengths from the header.

201

202

This also populates the various 'compressed' buffers.

203

204

:return: The position in bytes just after the last newline

205

"""

206

# At present, we have 2 integers for the compressed and uncompressed

207

# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid

208

# checking too far, cap the search to 14 bytes.

209

pos2 = bytes.index('\n', pos, pos + 14)

210

self._z_content_length = int(bytes[pos:pos2])

211

pos = pos2 + 1

212

pos2 = bytes.index('\n', pos, pos + 14)

213

self._content_length = int(bytes[pos:pos2])

214

pos = pos2 + 1

215

if len(bytes) != (pos + self._z_content_length):

216

# XXX: Define some GCCorrupt error ?

217

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

218

(len(bytes), pos, self._z_content_length))

219

self._z_content_chunks = (bytes[pos:],)

220

221

@property

222

def _z_content(self):

223

"""Return z_content_chunks as a simple string.

224

225

Meant only to be used by the test suite.

226

"""

227

if self._z_content_chunks is not None:

228

return ''.join(self._z_content_chunks)

229

return None

230

231

@classmethod

232

def from_bytes(cls, bytes):

233

out = cls()

234

if bytes[:6] not in cls.GCB_KNOWN_HEADERS:

235

raise ValueError('bytes did not start with any of %r'

236

% (cls.GCB_KNOWN_HEADERS,))

237

# XXX: why not testing the whole header ?

238

if bytes[4] == 'z':

239

out._compressor_name = 'zlib'

240

elif bytes[4] == 'l':

241

out._compressor_name = 'lzma'

242

else:

243

raise ValueError('unknown compressor: %r' % (bytes,))

244

out._parse_bytes(bytes, 6)

245

return out

246

247

def extract(self, key, start, end, sha1=None):

248

"""Extract the text for a specific key.

249

250

:param key: The label used for this content

251

:param sha1: TODO (should we validate only when sha1 is supplied?)

252

:return: The bytes for the content

253

"""

254

if start == end == 0:

255

return ''

256

self._ensure_content(end)

257

# The bytes are 'f' or 'd' for the type, then a variable-length

258

# base128 integer for the content size, then the actual content

259

# We know that the variable-length integer won't be longer than 5

260

# bytes (it takes 5 bytes to encode 2^32)

261

c = self._content[start]

262

if c == 'f':

263

type = 'fulltext'

264

else:

265

if c != 'd':

266

raise ValueError('Unknown content control code: %s'

267

% (c,))

268

type = 'delta'

269

content_len, len_len = decode_base128_int(

270

self._content[start + 1:start + 6])

271

content_start = start + 1 + len_len

272

if end != content_start + content_len:

273

raise ValueError('end != len according to field header'

274

' %s != %s' % (end, content_start + content_len))

275

if c == 'f':

276

bytes = self._content[content_start:end]

277

elif c == 'd':

278

bytes = apply_delta_to_source(self._content, content_start, end)

279

return bytes

280

281

def set_chunked_content(self, content_chunks, length):

282

"""Set the content of this block to the given chunks."""

283

# If we have lots of short lines, it is may be more efficient to join

284

# the content ahead of time. If the content is <10MiB, we don't really

285

# care about the extra memory consumption, so we can just pack it and

286

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

287

# mysql, which is below the noise margin

288

self._content_length = length

289

self._content_chunks = content_chunks

290

self._content = None

291

self._z_content_chunks = None

292

293

def set_content(self, content):

294

"""Set the content of this block."""

295

self._content_length = len(content)

296

self._content = content

297

self._z_content_chunks = None

298

299

def _create_z_content_from_chunks(self, chunks):

300

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

301

# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead

302

# (measured peak is maybe 30MB over the above...)

303

compressed_chunks = map(compressor.compress, chunks)

304

compressed_chunks.append(compressor.flush())

305

# Ignore empty chunks

306

self._z_content_chunks = [c for c in compressed_chunks if c]

307

self._z_content_length = sum(map(len, self._z_content_chunks))

308

309

def _create_z_content(self):

310

if self._z_content_chunks is not None:

311

return

312

if self._content_chunks is not None:

313

chunks = self._content_chunks

314

else:

315

chunks = (self._content,)

316

self._create_z_content_from_chunks(chunks)

317

318

def to_chunks(self):

319

"""Create the byte stream as a series of 'chunks'"""

320

self._create_z_content()

321

header = self.GCB_HEADER

322

chunks = ['%s%d\n%d\n'

323

% (header, self._z_content_length, self._content_length),

324

]

325

chunks.extend(self._z_content_chunks)

326

total_len = sum(map(len, chunks))

327

return total_len, chunks

328

329

def to_bytes(self):

330

"""Encode the information into a byte stream."""

331

total_len, chunks = self.to_chunks()

332

return ''.join(chunks)

333

334

def _dump(self, include_text=False):

335

"""Take this block, and spit out a human-readable structure.

336

337

:param include_text: Inserts also include text bits, chose whether you

338

want this displayed in the dump or not.

339

:return: A dump of the given block. The layout is something like:

340

[('f', length), ('d', delta_length, text_length, [delta_info])]

341

delta_info := [('i', num_bytes, text), ('c', offset, num_bytes),

342

...]

343

"""

344

self._ensure_content()

345

result = []

346

pos = 0

347

while pos < self._content_length:

348

kind = self._content[pos]

349

pos += 1

350

if kind not in ('f', 'd'):

351

raise ValueError('invalid kind character: %r' % (kind,))

352

content_len, len_len = decode_base128_int(

353

self._content[pos:pos + 5])

354

pos += len_len

355

if content_len + pos > self._content_length:

356

raise ValueError('invalid content_len %d for record @ pos %d'

357

% (content_len, pos - len_len - 1))

358

if kind == 'f': # Fulltext

359

if include_text:

360

text = self._content[pos:pos+content_len]

361

result.append(('f', content_len, text))

362

else:

363

result.append(('f', content_len))

364

elif kind == 'd': # Delta

365

delta_content = self._content[pos:pos+content_len]

366

delta_info = []

367

# The first entry in a delta is the decompressed length

368

decomp_len, delta_pos = decode_base128_int(delta_content)

369

result.append(('d', content_len, decomp_len, delta_info))

370

measured_len = 0

371

while delta_pos < content_len:

372

c = ord(delta_content[delta_pos])

373

delta_pos += 1

374

if c & 0x80: # Copy

375

(offset, length,

376

delta_pos) = decode_copy_instruction(delta_content, c,

377

delta_pos)

378

if include_text:

379

text = self._content[offset:offset+length]

380

delta_info.append(('c', offset, length, text))

381

else:

382

delta_info.append(('c', offset, length))

383

measured_len += length

384

else: # Insert

385

if include_text:

386

txt = delta_content[delta_pos:delta_pos+c]

387

else:

388

txt = ''

389

delta_info.append(('i', c, txt))

390

measured_len += c

391

delta_pos += c

392

if delta_pos != content_len:

393

raise ValueError('Delta consumed a bad number of bytes:'

394

' %d != %d' % (delta_pos, content_len))

395

if measured_len != decomp_len:

396

raise ValueError('Delta claimed fulltext was %d bytes, but'

397

' extraction resulted in %d bytes'

398

% (decomp_len, measured_len))

399

pos += content_len

400

return result

401

402

403

class _LazyGroupCompressFactory(object):

404

"""Yield content from a GroupCompressBlock on demand."""

405

406

def __init__(self, key, parents, manager, start, end, first):

407

"""Create a _LazyGroupCompressFactory

408

409

:param key: The key of just this record

410

:param parents: The parents of this key (possibly None)

411

:param gc_block: A GroupCompressBlock object

412

:param start: Offset of the first byte for this record in the

413

uncompressd content

414

:param end: Offset of the byte just after the end of this record

415

(ie, bytes = content[start:end])

416

:param first: Is this the first Factory for the given block?

417

"""

418

self.key = key

419

self.parents = parents

420

self.sha1 = None

421

# Note: This attribute coupled with Manager._factories creates a

422

# reference cycle. Perhaps we would rather use a weakref(), or

423

# find an appropriate time to release the ref. After the first

424

# get_bytes_as call? After Manager.get_record_stream() returns

425

# the object?

426

self._manager = manager

427

self._bytes = None

428

self.storage_kind = 'groupcompress-block'

429

if not first:

430

self.storage_kind = 'groupcompress-block-ref'

431

self._first = first

432

self._start = start

433

self._end = end

434

435

def __repr__(self):

436

return '%s(%s, first=%s)' % (self.__class__.__name__,

437

self.key, self._first)

438

439

def get_bytes_as(self, storage_kind):

440

if storage_kind == self.storage_kind:

441

if self._first:

442

# wire bytes, something...

443

return self._manager._wire_bytes()

444

else:

445

return ''

446

if storage_kind in ('fulltext', 'chunked'):

447

if self._bytes is None:

448

# Grab and cache the raw bytes for this entry

449

# and break the ref-cycle with _manager since we don't need it

450

# anymore

451

try:

452

self._manager._prepare_for_extract()

453

except zlib.error as value:

454

raise errors.DecompressCorruption("zlib: " + str(value))

455

block = self._manager._block

456

self._bytes = block.extract(self.key, self._start, self._end)

457

# There are code paths that first extract as fulltext, and then

458

# extract as storage_kind (smart fetch). So we don't break the

459

# refcycle here, but instead in manager.get_record_stream()

460

if storage_kind == 'fulltext':

461

return self._bytes

462

else:

463

return [self._bytes]

464

raise errors.UnavailableRepresentation(self.key, storage_kind,

465

self.storage_kind)

466

467

468

class _LazyGroupContentManager(object):

469

"""This manages a group of _LazyGroupCompressFactory objects."""

470

471

_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of

472

# current size, and still be considered

473

# resuable

474

_full_block_size = 4*1024*1024

475

_full_mixed_block_size = 2*1024*1024

476

_full_enough_block_size = 3*1024*1024 # size at which we won't repack

477

_full_enough_mixed_block_size = 2*768*1024 # 1.5MB

478

479

def __init__(self, block, get_compressor_settings=None):

480

self._block = block

481

# We need to preserve the ordering

482

self._factories = []

483

self._last_byte = 0

484

self._get_settings = get_compressor_settings

485

self._compressor_settings = None

486

487

def _get_compressor_settings(self):

488

if self._compressor_settings is not None:

489

return self._compressor_settings

490

settings = None

491

if self._get_settings is not None:

492

settings = self._get_settings()

493

if settings is None:

494

vf = GroupCompressVersionedFiles

495

settings = vf._DEFAULT_COMPRESSOR_SETTINGS

496

self._compressor_settings = settings

497

return self._compressor_settings

498

499

def add_factory(self, key, parents, start, end):

500

if not self._factories:

501

first = True

502

else:

503

first = False

504

# Note that this creates a reference cycle....

505

factory = _LazyGroupCompressFactory(key, parents, self,

506

start, end, first=first)

507

# max() works here, but as a function call, doing a compare seems to be

508

# significantly faster, timeit says 250ms for max() and 100ms for the

509

# comparison

510

if end > self._last_byte:

511

self._last_byte = end

512

self._factories.append(factory)

513

514

def get_record_stream(self):

515

"""Get a record for all keys added so far."""

516

for factory in self._factories:

517

yield factory

518

# Break the ref-cycle

519

factory._bytes = None

520

factory._manager = None

521

# TODO: Consider setting self._factories = None after the above loop,

522

# as it will break the reference cycle

523

524

def _trim_block(self, last_byte):

525

"""Create a new GroupCompressBlock, with just some of the content."""

526

# None of the factories need to be adjusted, because the content is

527

# located in an identical place. Just that some of the unreferenced

528

# trailing bytes are stripped

529

trace.mutter('stripping trailing bytes from groupcompress block'

530

' %d => %d', self._block._content_length, last_byte)

531

new_block = GroupCompressBlock()

532

self._block._ensure_content(last_byte)

533

new_block.set_content(self._block._content[:last_byte])

534

self._block = new_block

535

536

def _make_group_compressor(self):

537

return GroupCompressor(self._get_compressor_settings())

538

539

def _rebuild_block(self):

540

"""Create a new GroupCompressBlock with only the referenced texts."""

541

compressor = self._make_group_compressor()

542

tstart = time.time()

543

old_length = self._block._content_length

544

end_point = 0

545

for factory in self._factories:

546

bytes = factory.get_bytes_as('fulltext')

547

(found_sha1, start_point, end_point,

548

type) = compressor.compress(factory.key, bytes, factory.sha1)

549

# Now update this factory with the new offsets, etc

550

factory.sha1 = found_sha1

551

factory._start = start_point

552

factory._end = end_point

553

self._last_byte = end_point

554

new_block = compressor.flush()

555

# TODO: Should we check that new_block really *is* smaller than the old

556

# block? It seems hard to come up with a method that it would

557

# expand, since we do full compression again. Perhaps based on a

558

# request that ends up poorly ordered?

559

# TODO: If the content would have expanded, then we would want to

560

# handle a case where we need to split the block.

561

# Now that we have a user-tweakable option

562

# (max_bytes_to_index), it is possible that one person set it

563

# to a very low value, causing poor compression.

564

delta = time.time() - tstart

565

self._block = new_block

566

trace.mutter('creating new compressed block on-the-fly in %.3fs'

567

' %d bytes => %d bytes', delta, old_length,

568

self._block._content_length)

569

570

def _prepare_for_extract(self):

571

"""A _LazyGroupCompressFactory is about to extract to fulltext."""

572

# We expect that if one child is going to fulltext, all will be. This

573

# helps prevent all of them from extracting a small amount at a time.

574

# Which in itself isn't terribly expensive, but resizing 2MB 32kB at a

575

# time (self._block._content) is a little expensive.

576

self._block._ensure_content(self._last_byte)

577

578

def _check_rebuild_action(self):

579

"""Check to see if our block should be repacked."""

580

total_bytes_used = 0

581

last_byte_used = 0

582

for factory in self._factories:

583

total_bytes_used += factory._end - factory._start

584

if last_byte_used < factory._end:

585

last_byte_used = factory._end

586

# If we are using more than half of the bytes from the block, we have

587

# nothing else to check

588

if total_bytes_used * 2 >= self._block._content_length:

589

return None, last_byte_used, total_bytes_used

590

# We are using less than 50% of the content. Is the content we are

591

# using at the beginning of the block? If so, we can just trim the

592

# tail, rather than rebuilding from scratch.

593

if total_bytes_used * 2 > last_byte_used:

594

return 'trim', last_byte_used, total_bytes_used

595

596

# We are using a small amount of the data, and it isn't just packed

597

# nicely at the front, so rebuild the content.

598

# Note: This would be *nicer* as a strip-data-from-group, rather than

599

# building it up again from scratch

600

# It might be reasonable to consider the fulltext sizes for

601

# different bits when deciding this, too. As you may have a small

602

# fulltext, and a trivial delta, and you are just trading around

603

# for another fulltext. If we do a simple 'prune' you may end up

604

# expanding many deltas into fulltexts, as well.

605

# If we build a cheap enough 'strip', then we could try a strip,

606

# if that expands the content, we then rebuild.

607

return 'rebuild', last_byte_used, total_bytes_used

608

609

def check_is_well_utilized(self):

610

"""Is the current block considered 'well utilized'?

611

612

This heuristic asks if the current block considers itself to be a fully

613

developed group, rather than just a loose collection of data.

614

"""

615

if len(self._factories) == 1:

616

# A block of length 1 could be improved by combining with other

617

# groups - don't look deeper. Even larger than max size groups

618

# could compress well with adjacent versions of the same thing.

619

return False

620

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

621

block_size = self._block._content_length

622

if total_bytes_used < block_size * self._max_cut_fraction:

623

# This block wants to trim itself small enough that we want to

624

# consider it under-utilized.

625

return False

626

# TODO: This code is meant to be the twin of _insert_record_stream's

627

# 'start_new_block' logic. It would probably be better to factor

628

# out that logic into a shared location, so that it stays

629

# together better

630

# We currently assume a block is properly utilized whenever it is >75%

631

# of the size of a 'full' block. In normal operation, a block is

632

# considered full when it hits 4MB of same-file content. So any block

633

# >3MB is 'full enough'.

634

# The only time this isn't true is when a given block has large-object

635

# content. (a single file >4MB, etc.)

636

# Under these circumstances, we allow a block to grow to

637

# 2 x largest_content. Which means that if a given block had a large

638

# object, it may actually be under-utilized. However, given that this

639

# is 'pack-on-the-fly' it is probably reasonable to not repack large

640

# content blobs on-the-fly. Note that because we return False for all

641

# 1-item blobs, we will repack them; we may wish to reevaluate our

642

# treatment of large object blobs in the future.

643

if block_size >= self._full_enough_block_size:

644

return True

645

# If a block is <3MB, it still may be considered 'full' if it contains

646

# mixed content. The current rule is 2MB of mixed content is considered

647

# full. So check to see if this block contains mixed content, and

648

# set the threshold appropriately.

649

common_prefix = None

650

for factory in self._factories:

651

prefix = factory.key[:-1]

652

if common_prefix is None:

653

common_prefix = prefix

654

elif prefix != common_prefix:

655

# Mixed content, check the size appropriately

656

if block_size >= self._full_enough_mixed_block_size:

657

return True

658

break

659

# The content failed both the mixed check and the single-content check

660

# so obviously it is not fully utilized

661

# TODO: there is one other constraint that isn't being checked

662

# namely, that the entries in the block are in the appropriate

663

# order. For example, you could insert the entries in exactly

664

# reverse groupcompress order, and we would think that is ok.

665

# (all the right objects are in one group, and it is fully

666

# utilized, etc.) For now, we assume that case is rare,

667

# especially since we should always fetch in 'groupcompress'

668

# order.

669

return False

670

671

def _check_rebuild_block(self):

672

action, last_byte_used, total_bytes_used = self._check_rebuild_action()

673

if action is None:

674

return

675

if action == 'trim':

676

self._trim_block(last_byte_used)

677

elif action == 'rebuild':

678

self._rebuild_block()

679

else:

680

raise ValueError('unknown rebuild action: %r' % (action,))

681

682

def _wire_bytes(self):

683

"""Return a byte stream suitable for transmitting over the wire."""

684

self._check_rebuild_block()

685

# The outer block starts with:

686

# 'groupcompress-block\n'

687

# <length of compressed key info>\n

688

# <length of uncompressed info>\n

689

# <length of gc block>\n

690

# <header bytes>

691

# <gc-block>

692

lines = ['groupcompress-block\n']

693

# The minimal info we need is the key, the start offset, and the

694

# parents. The length and type are encoded in the record itself.

695

# However, passing in the other bits makes it easier. The list of

696

# keys, and the start offset, the length

697

# 1 line key

698

# 1 line with parents, '' for ()

699

# 1 line for start offset

700

# 1 line for end byte

701

header_lines = []

702

for factory in self._factories:

703

key_bytes = '\x00'.join(factory.key)

704

parents = factory.parents

705

if parents is None:

706

parent_bytes = 'None:'

707

else:

708

parent_bytes = '\t'.join('\x00'.join(key) for key in parents)

709

record_header = '%s\n%s\n%d\n%d\n' % (

710

key_bytes, parent_bytes, factory._start, factory._end)

711

header_lines.append(record_header)

712

# TODO: Can we break the refcycle at this point and set

713

# factory._manager = None?

714

header_bytes = ''.join(header_lines)

715

del header_lines

716

header_bytes_len = len(header_bytes)

717

z_header_bytes = zlib.compress(header_bytes)

718

del header_bytes

719

z_header_bytes_len = len(z_header_bytes)

720

block_bytes_len, block_chunks = self._block.to_chunks()

721

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

722

block_bytes_len))

723

lines.append(z_header_bytes)

724

lines.extend(block_chunks)

725

del z_header_bytes, block_chunks

726

# TODO: This is a point where we will double the memory consumption. To

727

# avoid this, we probably have to switch to a 'chunked' api

728

return ''.join(lines)

729

730

@classmethod

731

def from_bytes(cls, bytes):

732

# TODO: This does extra string copying, probably better to do it a

733

# different way. At a minimum this creates 2 copies of the

734

# compressed content

735

(storage_kind, z_header_len, header_len,

736

block_len, rest) = bytes.split('\n', 4)

737

del bytes

738

if storage_kind != 'groupcompress-block':

739

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

740

z_header_len = int(z_header_len)

741

if len(rest) < z_header_len:

742

raise ValueError('Compressed header len shorter than all bytes')

743

z_header = rest[:z_header_len]

744

header_len = int(header_len)

745

header = zlib.decompress(z_header)

746

if len(header) != header_len:

747

raise ValueError('invalid length for decompressed bytes')

748

del z_header

749

block_len = int(block_len)

750

if len(rest) != z_header_len + block_len:

751

raise ValueError('Invalid length for block')

752

block_bytes = rest[z_header_len:]

753

del rest

754

# So now we have a valid GCB, we just need to parse the factories that

755

# were sent to us

756

header_lines = header.split('\n')

757

del header

758

last = header_lines.pop()

759

if last != '':

760

raise ValueError('header lines did not end with a trailing'

761

' newline')

762

if len(header_lines) % 4 != 0:

763

raise ValueError('The header was not an even multiple of 4 lines')

764

block = GroupCompressBlock.from_bytes(block_bytes)

765

del block_bytes

766

result = cls(block)

767

for start in xrange(0, len(header_lines), 4):

768

# intern()?

769

key = tuple(header_lines[start].split('\x00'))

770

parents_line = header_lines[start+1]

771

if parents_line == 'None:':

772

parents = None

773

else:

774

parents = tuple([tuple(segment.split('\x00'))

775

for segment in parents_line.split('\t')

776

if segment])

777

start_offset = int(header_lines[start+2])

778

end_offset = int(header_lines[start+3])

779

result.add_factory(key, parents, start_offset, end_offset)

780

return result

781

782

783

def network_block_to_records(storage_kind, bytes, line_end):

784

if storage_kind != 'groupcompress-block':

785

raise ValueError('Unknown storage kind: %s' % (storage_kind,))

786

manager = _LazyGroupContentManager.from_bytes(bytes)

787

return manager.get_record_stream()

788

789

790

class _CommonGroupCompressor(object):

791

792

def __init__(self, settings=None):

793

"""Create a GroupCompressor."""

794

self.chunks = []

795

self._last = None

796

self.endpoint = 0

797

self.input_bytes = 0

798

self.labels_deltas = {}

799

self._delta_index = None # Set by the children

800

self._block = GroupCompressBlock()

801

if settings is None:

802

self._settings = {}

803

else:

804

self._settings = settings

805

806

def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):

807

"""Compress lines with label key.

808

809

:param key: A key tuple. It is stored in the output

810

for identification of the text during decompression. If the last

811

element is 'None' it is replaced with the sha1 of the text -

812

e.g. sha1:xxxxxxx.

813

:param bytes: The bytes to be compressed

814

:param expected_sha: If non-None, the sha the lines are believed to

815

have. During compression the sha is calculated; a mismatch will

816

cause an error.

817

:param nostore_sha: If the computed sha1 sum matches, we will raise

818

ExistingContent rather than adding the text.

819

:param soft: Do a 'soft' compression. This means that we require larger

820

ranges to match to be considered for a copy command.

821

822

:return: The sha1 of lines, the start and end offsets in the delta, and

823

the type ('fulltext' or 'delta').

824

825

:seealso VersionedFiles.add_lines:

826

"""

827

if not bytes: # empty, like a dir entry, etc

828

if nostore_sha == _null_sha1:

829

raise errors.ExistingContent()

830

return _null_sha1, 0, 0, 'fulltext'

831

# we assume someone knew what they were doing when they passed it in

832

if expected_sha is not None:

833

sha1 = expected_sha

834

else:

835

sha1 = osutils.sha_string(bytes)

836

if nostore_sha is not None:

837

if sha1 == nostore_sha:

838

raise errors.ExistingContent()

839

if key[-1] is None:

840

key = key[:-1] + ('sha1:' + sha1,)

841

842

start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)

843

return sha1, start, end, type

844

845

def _compress(self, key, bytes, max_delta_size, soft=False):

846

"""Compress lines with label key.

847

848

:param key: A key tuple. It is stored in the output for identification

849

of the text during decompression.

850

851

:param bytes: The bytes to be compressed

852

853

:param max_delta_size: The size above which we issue a fulltext instead

854

of a delta.

855

856

:param soft: Do a 'soft' compression. This means that we require larger

857

ranges to match to be considered for a copy command.

858

859

:return: The sha1 of lines, the start and end offsets in the delta, and

860

the type ('fulltext' or 'delta').

861

"""

862

raise NotImplementedError(self._compress)

863

864

def extract(self, key):

865

"""Extract a key previously added to the compressor.

866

867

:param key: The key to extract.

868

:return: An iterable over bytes and the sha1.

869

"""

870

(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]

871

delta_chunks = self.chunks[start_chunk:end_chunk]

872

stored_bytes = ''.join(delta_chunks)

873

if stored_bytes[0] == 'f':

874

fulltext_len, offset = decode_base128_int(stored_bytes[1:10])

875

data_len = fulltext_len + 1 + offset

876

if data_len != len(stored_bytes):

877

raise ValueError('Index claimed fulltext len, but stored bytes'

878

' claim %s != %s'

879

% (len(stored_bytes), data_len))

880

bytes = stored_bytes[offset + 1:]

881

else:

882

# XXX: This is inefficient at best

883

source = ''.join(self.chunks[:start_chunk])

884

if stored_bytes[0] != 'd':

885

raise ValueError('Unknown content kind, bytes claim %s'

886

% (stored_bytes[0],))

887

delta_len, offset = decode_base128_int(stored_bytes[1:10])

888

data_len = delta_len + 1 + offset

889

if data_len != len(stored_bytes):

890

raise ValueError('Index claimed delta len, but stored bytes'

891

' claim %s != %s'

892

% (len(stored_bytes), data_len))

893

bytes = apply_delta(source, stored_bytes[offset + 1:])

894

bytes_sha1 = osutils.sha_string(bytes)

895

return bytes, bytes_sha1

896

897

def flush(self):

898

"""Finish this group, creating a formatted stream.

899

900

After calling this, the compressor should no longer be used

901

"""

902

self._block.set_chunked_content(self.chunks, self.endpoint)

903

self.chunks = None

904

self._delta_index = None

905

return self._block

906

907

def pop_last(self):

908

"""Call this if you want to 'revoke' the last compression.

909

910

After this, the data structures will be rolled back, but you cannot do

911

more compression.

912

"""

913

self._delta_index = None

914

del self.chunks[self._last[0]:]

915

self.endpoint = self._last[1]

916

self._last = None

917

918

def ratio(self):

919

"""Return the overall compression ratio."""

920

return float(self.input_bytes) / float(self.endpoint)

921

922

923

class PythonGroupCompressor(_CommonGroupCompressor):

924

925

def __init__(self, settings=None):

926

"""Create a GroupCompressor.

927

928

Used only if the pyrex version is not available.

929

"""

930

super(PythonGroupCompressor, self).__init__(settings)

931

self._delta_index = LinesDeltaIndex([])

932

# The actual content is managed by LinesDeltaIndex

933

self.chunks = self._delta_index.lines

934

935

def _compress(self, key, bytes, max_delta_size, soft=False):

936

"""see _CommonGroupCompressor._compress"""

937

input_len = len(bytes)

938

new_lines = osutils.split_lines(bytes)

939

out_lines, index_lines = self._delta_index.make_delta(

940

new_lines, bytes_length=input_len, soft=soft)

941

delta_length = sum(map(len, out_lines))

942

if delta_length > max_delta_size:

943

# The delta is longer than the fulltext, insert a fulltext

944

type = 'fulltext'

945

out_lines = ['f', encode_base128_int(input_len)]

946

out_lines.extend(new_lines)

947

index_lines = [False, False]

948

index_lines.extend([True] * len(new_lines))

949

else:

950

# this is a worthy delta, output it

951

type = 'delta'

952

out_lines[0] = 'd'

953

# Update the delta_length to include those two encoded integers

954

out_lines[1] = encode_base128_int(delta_length)

955

# Before insertion

956

start = self.endpoint

957

chunk_start = len(self.chunks)

958

self._last = (chunk_start, self.endpoint)

959

self._delta_index.extend_lines(out_lines, index_lines)

960

self.endpoint = self._delta_index.endpoint

961

self.input_bytes += input_len

962

chunk_end = len(self.chunks)

963

self.labels_deltas[key] = (start, chunk_start,

964

self.endpoint, chunk_end)

965

return start, self.endpoint, type

966

967

968

class PyrexGroupCompressor(_CommonGroupCompressor):

969

"""Produce a serialised group of compressed texts.

970

971

It contains code very similar to SequenceMatcher because of having a similar

972

task. However some key differences apply:

973

974

* there is no junk, we want a minimal edit not a human readable diff.

975

* we don't filter very common lines (because we don't know where a good

976

range will start, and after the first text we want to be emitting minmal

977

edits only.

978

* we chain the left side, not the right side

979

* we incrementally update the adjacency matrix as new lines are provided.

980

* we look for matches in all of the left side, so the routine which does

981

the analagous task of find_longest_match does not need to filter on the

982

left side.

983

"""

984

985

def __init__(self, settings=None):

986

super(PyrexGroupCompressor, self).__init__(settings)

987

max_bytes_to_index = self._settings.get('max_bytes_to_index', 0)

988

self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index)

989

990

def _compress(self, key, bytes, max_delta_size, soft=False):

991

"""see _CommonGroupCompressor._compress"""

992

input_len = len(bytes)

993

# By having action/label/sha1/len, we can parse the group if the index

994

# was ever destroyed, we have the key in 'label', we know the final

995

# bytes are valid from sha1, and we know where to find the end of this

996

# record because of 'len'. (the delta record itself will store the

997

# total length for the expanded record)

998

# 'len: %d\n' costs approximately 1% increase in total data

999

# Having the labels at all costs us 9-10% increase, 38% increase for

1000

# inventory pages, and 5.8% increase for text pages

1001

# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]

1002

if self._delta_index._source_offset != self.endpoint:

1003

raise AssertionError('_source_offset != endpoint'

1004

' somehow the DeltaIndex got out of sync with'

1005

' the output lines')

1006

delta = self._delta_index.make_delta(bytes, max_delta_size)

1007

if (delta is None):

1008

type = 'fulltext'

1009

enc_length = encode_base128_int(len(bytes))

1010

len_mini_header = 1 + len(enc_length)

1011

self._delta_index.add_source(bytes, len_mini_header)

1012

new_chunks = ['f', enc_length, bytes]

1013

else:

1014

type = 'delta'

1015

enc_length = encode_base128_int(len(delta))

1016

len_mini_header = 1 + len(enc_length)

1017

new_chunks = ['d', enc_length, delta]

1018

self._delta_index.add_delta_source(delta, len_mini_header)

1019

# Before insertion

1020

start = self.endpoint

1021

chunk_start = len(self.chunks)

1022

# Now output these bytes

1023

self._output_chunks(new_chunks)

1024

self.input_bytes += input_len

1025

chunk_end = len(self.chunks)

1026

self.labels_deltas[key] = (start, chunk_start,

1027

self.endpoint, chunk_end)

1028

if not self._delta_index._source_offset == self.endpoint:

1029

raise AssertionError('the delta index is out of sync'

1030

'with the output lines %s != %s'

1031

% (self._delta_index._source_offset, self.endpoint))

1032

return start, self.endpoint, type

1033

1034

def _output_chunks(self, new_chunks):

1035

"""Output some chunks.

1036

1037

:param new_chunks: The chunks to output.

1038

"""

1039

self._last = (len(self.chunks), self.endpoint)

1040

endpoint = self.endpoint

1041

self.chunks.extend(new_chunks)

1042

endpoint += sum(map(len, new_chunks))

1043

self.endpoint = endpoint

1044

1045

1046

def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):

1047

"""Create a factory for creating a pack based groupcompress.

1048

1049

This is only functional enough to run interface tests, it doesn't try to

1050

provide a full pack environment.

1051

1052

:param graph: Store a graph.

1053

:param delta: Delta compress contents.

1054

:param keylength: How long should keys be.

1055

"""

1056

def factory(transport):

1057

parents = graph

1058

ref_length = 0

1059

if graph:

1060

ref_length = 1

1061

graph_index = BTreeBuilder(reference_lists=ref_length,

1062

key_elements=keylength)

1063

stream = transport.open_write_stream('newpack')

1064

writer = pack.ContainerWriter(stream.write)

1065

writer.begin()

1066

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

1067

add_callback=graph_index.add_nodes,

1068

inconsistency_fatal=inconsistency_fatal)

1069

access = pack_repo._DirectPackAccess({})

1070

access.set_writer(writer, graph_index, (transport, 'newpack'))

1071

result = GroupCompressVersionedFiles(index, access, delta)

1072

result.stream = stream

1073

result.writer = writer

1074

return result

1075

return factory

1076

1077

1078

def cleanup_pack_group(versioned_files):

1079

versioned_files.writer.end()

1080

versioned_files.stream.close()

1081

1082

1083

class _BatchingBlockFetcher(object):

1084

"""Fetch group compress blocks in batches.

1085

1086

:ivar total_bytes: int of expected number of bytes needed to fetch the

1087

currently pending batch.

1088

"""

1089

1090

def __init__(self, gcvf, locations, get_compressor_settings=None):

1091

self.gcvf = gcvf

1092

self.locations = locations

1093

self.keys = []

1094

self.batch_memos = {}

1095

self.memos_to_get = []

1096

self.total_bytes = 0

1097

self.last_read_memo = None

1098

self.manager = None

1099

self._get_compressor_settings = get_compressor_settings

1100

1101

def add_key(self, key):

1102

"""Add another to key to fetch.

1103

1104

:return: The estimated number of bytes needed to fetch the batch so

1105

far.

1106

"""

1107

self.keys.append(key)

1108

index_memo, _, _, _ = self.locations[key]

1109

read_memo = index_memo[0:3]

1110

# Three possibilities for this read_memo:

1111

# - it's already part of this batch; or

1112

# - it's not yet part of this batch, but is already cached; or

1113

# - it's not yet part of this batch and will need to be fetched.

1114

if read_memo in self.batch_memos:

1115

# This read memo is already in this batch.

1116

return self.total_bytes

1117

try:

1118

cached_block = self.gcvf._group_cache[read_memo]

1119

except KeyError:

1120

# This read memo is new to this batch, and the data isn't cached

1121

# either.

1122

self.batch_memos[read_memo] = None

1123

self.memos_to_get.append(read_memo)

1124

byte_length = read_memo[2]

1125

self.total_bytes += byte_length

1126

else:

1127

# This read memo is new to this batch, but cached.

1128

# Keep a reference to the cached block in batch_memos because it's

1129

# certain that we'll use it when this batch is processed, but

1130

# there's a risk that it would fall out of _group_cache between now

1131

# and then.

1132

self.batch_memos[read_memo] = cached_block

1133

return self.total_bytes

1134

1135

def _flush_manager(self):

1136

if self.manager is not None:

1137

for factory in self.manager.get_record_stream():

1138

yield factory

1139

self.manager = None

1140

self.last_read_memo = None

1141

1142

def yield_factories(self, full_flush=False):

1143

"""Yield factories for keys added since the last yield. They will be

1144

returned in the order they were added via add_key.

1145

1146

:param full_flush: by default, some results may not be returned in case

1147

they can be part of the next batch. If full_flush is True, then

1148

all results are returned.

1149

"""

1150

if self.manager is None and not self.keys:

1151

return

1152

# Fetch all memos in this batch.

1153

blocks = self.gcvf._get_blocks(self.memos_to_get)

1154

# Turn blocks into factories and yield them.

1155

memos_to_get_stack = list(self.memos_to_get)

1156

memos_to_get_stack.reverse()

1157

for key in self.keys:

1158

index_memo, _, parents, _ = self.locations[key]

1159

read_memo = index_memo[:3]

1160

if self.last_read_memo != read_memo:

1161

# We are starting a new block. If we have a

1162

# manager, we have found everything that fits for

1163

# now, so yield records

1164

for factory in self._flush_manager():

1165

yield factory

1166

# Now start a new manager.

1167

if memos_to_get_stack and memos_to_get_stack[-1] == read_memo:

1168

# The next block from _get_blocks will be the block we

1169

# need.

1170

block_read_memo, block = blocks.next()

1171

if block_read_memo != read_memo:

1172

raise AssertionError(

1173

"block_read_memo out of sync with read_memo"

1174

"(%r != %r)" % (block_read_memo, read_memo))

1175

self.batch_memos[read_memo] = block

1176

memos_to_get_stack.pop()

1177

else:

1178

block = self.batch_memos[read_memo]

1179

self.manager = _LazyGroupContentManager(block,

1180

get_compressor_settings=self._get_compressor_settings)

1181

self.last_read_memo = read_memo

1182

start, end = index_memo[3:5]

1183

self.manager.add_factory(key, parents, start, end)

1184

if full_flush:

1185

for factory in self._flush_manager():

1186

yield factory

1187

del self.keys[:]

1188

self.batch_memos.clear()

1189

del self.memos_to_get[:]

1190

self.total_bytes = 0

1191

1192

1193

class GroupCompressVersionedFiles(VersionedFilesWithFallbacks):

1194

"""A group-compress based VersionedFiles implementation."""

1195

1196

# This controls how the GroupCompress DeltaIndex works. Basically, we

1197

# compute hash pointers into the source blocks (so hash(text) => text).

1198

# However each of these references costs some memory in trade against a

1199

# more accurate match result. For very large files, they either are

1200

# pre-compressed and change in bulk whenever they change, or change in just

1201

# local blocks. Either way, 'improved resolution' is not very helpful,

1202

# versus running out of memory trying to track everything. The default max

1203

# gives 100% sampling of a 1MB file.

1204

_DEFAULT_MAX_BYTES_TO_INDEX = 1024 * 1024

1205

_DEFAULT_COMPRESSOR_SETTINGS = {'max_bytes_to_index':

1206

_DEFAULT_MAX_BYTES_TO_INDEX}

1207

1208

def __init__(self, index, access, delta=True, _unadded_refs=None,

1209

_group_cache=None):

1210

"""Create a GroupCompressVersionedFiles object.

1211

1212

:param index: The index object storing access and graph data.

1213

:param access: The access object storing raw data.

1214

:param delta: Whether to delta compress or just entropy compress.

1215

:param _unadded_refs: private parameter, don't use.

1216

:param _group_cache: private parameter, don't use.

1217

"""

1218

self._index = index

1219

self._access = access

1220

self._delta = delta

1221

if _unadded_refs is None:

1222

_unadded_refs = {}

1223

self._unadded_refs = _unadded_refs

1224

if _group_cache is None:

1225

_group_cache = LRUSizeCache(max_size=50*1024*1024)

1226

self._group_cache = _group_cache

1227

self._immediate_fallback_vfs = []

1228

self._max_bytes_to_index = None

1229

1230

def without_fallbacks(self):

1231

"""Return a clone of this object without any fallbacks configured."""

1232

return GroupCompressVersionedFiles(self._index, self._access,

1233

self._delta, _unadded_refs=dict(self._unadded_refs),

1234

_group_cache=self._group_cache)

1235

1236

def add_lines(self, key, parents, lines, parent_texts=None,

1237

left_matching_blocks=None, nostore_sha=None, random_id=False,

1238

check_content=True):

1239

"""Add a text to the store.

1240

1241

:param key: The key tuple of the text to add.

1242

:param parents: The parents key tuples of the text to add.

1243

:param lines: A list of lines. Each line must be a bytestring. And all

1244

of them except the last must be terminated with \\n and contain no

1245

other \\n's. The last line may either contain no \\n's or a single

1246

terminating \\n. If the lines list does meet this constraint the

1247

add routine may error or may succeed - but you will be unable to

1248

read the data back accurately. (Checking the lines have been split

1249

correctly is expensive and extremely unlikely to catch bugs so it

1250

is not done at runtime unless check_content is True.)

1251

:param parent_texts: An optional dictionary containing the opaque

1252

representations of some or all of the parents of version_id to

1253

allow delta optimisations. VERY IMPORTANT: the texts must be those

1254

returned by add_lines or data corruption can be caused.

1255

:param left_matching_blocks: a hint about which areas are common

1256

between the text and its left-hand-parent. The format is

1257

the SequenceMatcher.get_matching_blocks format.

1258

:param nostore_sha: Raise ExistingContent and do not add the lines to

1259

the versioned file if the digest of the lines matches this.

1260

:param random_id: If True a random id has been selected rather than

1261

an id determined by some deterministic process such as a converter

1262

from a foreign VCS. When True the backend may choose not to check

1263

for uniqueness of the resulting key within the versioned file, so

1264

this should only be done when the result is expected to be unique

1265

anyway.

1266

:param check_content: If True, the lines supplied are verified to be

1267

bytestrings that are correctly formed lines.

1268

:return: The text sha1, the number of bytes in the text, and an opaque

1269

representation of the inserted version which can be provided

1270

back to future add_lines calls in the parent_texts dictionary.

1271

"""

1272

self._index._check_write_ok()

1273

self._check_add(key, lines, random_id, check_content)

1274

if parents is None:

1275

# The caller might pass None if there is no graph data, but kndx

1276

# indexes can't directly store that, so we give them

1277

# an empty tuple instead.

1278

parents = ()

1279

# double handling for now. Make it work until then.

1280

length = sum(map(len, lines))

1281

record = ChunkedContentFactory(key, parents, None, lines)

1282

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1283

nostore_sha=nostore_sha))[0]

1284

return sha1, length, None

1285

1286

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1287

"""See VersionedFiles._add_text()."""

1288

self._index._check_write_ok()

1289

self._check_add(key, None, random_id, check_content=False)

1290

if text.__class__ is not str:

1291

raise errors.BzrBadParameterUnicode("text")

1292

if parents is None:

1293

# The caller might pass None if there is no graph data, but kndx

1294

# indexes can't directly store that, so we give them

1295

# an empty tuple instead.

1296

parents = ()

1297

# double handling for now. Make it work until then.

1298

length = len(text)

1299

record = FulltextContentFactory(key, parents, None, text)

1300

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1301

nostore_sha=nostore_sha))[0]

1302

return sha1, length, None

1303

1304

def add_fallback_versioned_files(self, a_versioned_files):

1305

"""Add a source of texts for texts not present in this knit.

1306

1307

:param a_versioned_files: A VersionedFiles object.

1308

"""

1309

self._immediate_fallback_vfs.append(a_versioned_files)

1310

1311

def annotate(self, key):

1312

"""See VersionedFiles.annotate."""

1313

ann = annotate.Annotator(self)

1314

return ann.annotate_flat(key)

1315

1316

def get_annotator(self):

1317

return annotate.Annotator(self)

1318

1319

def check(self, progress_bar=None, keys=None):

1320

"""See VersionedFiles.check()."""

1321

if keys is None:

1322

keys = self.keys()

1323

for record in self.get_record_stream(keys, 'unordered', True):

1324

record.get_bytes_as('fulltext')

1325

else:

1326

return self.get_record_stream(keys, 'unordered', True)

1327

1328

def clear_cache(self):

1329

"""See VersionedFiles.clear_cache()"""

1330

self._group_cache.clear()

1331

self._index._graph_index.clear_cache()

1332

self._index._int_cache.clear()

1333

1334

def _check_add(self, key, lines, random_id, check_content):

1335

"""check that version_id and lines are safe to add."""

1336

version_id = key[-1]

1337

if version_id is not None:

1338

if osutils.contains_whitespace(version_id):

1339

raise errors.InvalidRevisionId(version_id, self)

1340

self.check_not_reserved_id(version_id)

1341

# TODO: If random_id==False and the key is already present, we should

1342

# probably check that the existing content is identical to what is

1343

# being inserted, and otherwise raise an exception. This would make

1344

# the bundle code simpler.

1345

if check_content:

1346

self._check_lines_not_unicode(lines)

1347

self._check_lines_are_lines(lines)

1348

1349

def get_parent_map(self, keys):

1350

"""Get a map of the graph parents of keys.

1351

1352

:param keys: The keys to look up parents for.

1353

:return: A mapping from keys to parents. Absent keys are absent from

1354

the mapping.

1355

"""

1356

return self._get_parent_map_with_sources(keys)[0]

1357

1358

def _get_parent_map_with_sources(self, keys):

1359

"""Get a map of the parents of keys.

1360

1361

:param keys: The keys to look up parents for.

1362

:return: A tuple. The first element is a mapping from keys to parents.

1363

Absent keys are absent from the mapping. The second element is a

1364

list with the locations each key was found in. The first element

1365

is the in-this-knit parents, the second the first fallback source,

1366

and so on.

1367

"""

1368

result = {}

1369

sources = [self._index] + self._immediate_fallback_vfs

1370

source_results = []

1371

missing = set(keys)

1372

for source in sources:

1373

if not missing:

1374

break

1375

new_result = source.get_parent_map(missing)

1376

source_results.append(new_result)

1377

result.update(new_result)

1378

missing.difference_update(set(new_result))

1379

return result, source_results

1380

1381

def _get_blocks(self, read_memos):

1382

"""Get GroupCompressBlocks for the given read_memos.

1383

1384

:returns: a series of (read_memo, block) pairs, in the order they were

1385

originally passed.

1386

"""

1387

cached = {}

1388

for read_memo in read_memos:

1389

try:

1390

block = self._group_cache[read_memo]

1391

except KeyError:

1392

pass

1393

else:

1394

cached[read_memo] = block

1395

not_cached = []

1396

not_cached_seen = set()

1397

for read_memo in read_memos:

1398

if read_memo in cached:

1399

# Don't fetch what we already have

1400

continue

1401

if read_memo in not_cached_seen:

1402

# Don't try to fetch the same data twice

1403

continue

1404

not_cached.append(read_memo)

1405

not_cached_seen.add(read_memo)

1406

raw_records = self._access.get_raw_records(not_cached)

1407

for read_memo in read_memos:

1408

try:

1409

yield read_memo, cached[read_memo]

1410

except KeyError:

1411

# Read the block, and cache it.

1412

zdata = raw_records.next()

1413

block = GroupCompressBlock.from_bytes(zdata)

1414

self._group_cache[read_memo] = block

1415

cached[read_memo] = block

1416

yield read_memo, block

1417

1418

def get_missing_compression_parent_keys(self):

1419

"""Return the keys of missing compression parents.

1420

1421

Missing compression parents occur when a record stream was missing

1422

basis texts, or a index was scanned that had missing basis texts.

1423

"""

1424

# GroupCompress cannot currently reference texts that are not in the

1425

# group, so this is valid for now

1426

return frozenset()

1427

1428

def get_record_stream(self, keys, ordering, include_delta_closure):

1429

"""Get a stream of records for keys.

1430

1431

:param keys: The keys to include.

1432

:param ordering: Either 'unordered' or 'topological'. A topologically

1433

sorted stream has compression parents strictly before their

1434

children.

1435

:param include_delta_closure: If True then the closure across any

1436

compression parents will be included (in the opaque data).

1437

:return: An iterator of ContentFactory objects, each of which is only

1438

valid until the iterator is advanced.

1439

"""

1440

# keys might be a generator

1441

orig_keys = list(keys)

1442

keys = set(keys)

1443

if not keys:

1444

return

1445

if (not self._index.has_graph

1446

and ordering in ('topological', 'groupcompress')):

1447

# Cannot topological order when no graph has been stored.

1448

# but we allow 'as-requested' or 'unordered'

1449

ordering = 'unordered'

1450

1451

remaining_keys = keys

1452

while True:

1453

try:

1454

keys = set(remaining_keys)

1455

for content_factory in self._get_remaining_record_stream(keys,

1456

orig_keys, ordering, include_delta_closure):

1457

remaining_keys.discard(content_factory.key)

1458

yield content_factory

1459

return

1460

except errors.RetryWithNewPacks, e:

1461

self._access.reload_or_raise(e)

1462

1463

def _find_from_fallback(self, missing):

1464

"""Find whatever keys you can from the fallbacks.

1465

1466

:param missing: A set of missing keys. This set will be mutated as keys

1467

are found from a fallback_vfs

1468

:return: (parent_map, key_to_source_map, source_results)

1469

parent_map the overall key => parent_keys

1470

key_to_source_map a dict from {key: source}

1471

source_results a list of (source: keys)

1472

"""

1473

parent_map = {}

1474

key_to_source_map = {}

1475

source_results = []

1476

for source in self._immediate_fallback_vfs:

1477

if not missing:

1478

break

1479

source_parents = source.get_parent_map(missing)

1480

parent_map.update(source_parents)

1481

source_parents = list(source_parents)

1482

source_results.append((source, source_parents))

1483

key_to_source_map.update((key, source) for key in source_parents)

1484

missing.difference_update(source_parents)

1485

return parent_map, key_to_source_map, source_results

1486

1487

def _get_ordered_source_keys(self, ordering, parent_map, key_to_source_map):

1488

"""Get the (source, [keys]) list.

1489

1490

The returned objects should be in the order defined by 'ordering',

1491

which can weave between different sources.

1492

1493

:param ordering: Must be one of 'topological' or 'groupcompress'

1494

:return: List of [(source, [keys])] tuples, such that all keys are in

1495

the defined order, regardless of source.

1496

"""

1497

if ordering == 'topological':

1498

present_keys = tsort.topo_sort(parent_map)

1499

else:

1500

# ordering == 'groupcompress'

1501

# XXX: This only optimizes for the target ordering. We may need

1502

# to balance that with the time it takes to extract

1503

# ordering, by somehow grouping based on

1504

# locations[key][0:3]

1505

present_keys = sort_gc_optimal(parent_map)

1506

# Now group by source:

1507

source_keys = []

1508

current_source = None

1509

for key in present_keys:

1510

source = key_to_source_map.get(key, self)

1511

if source is not current_source:

1512

source_keys.append((source, []))

1513

current_source = source

1514

source_keys[-1][1].append(key)

1515

return source_keys

1516

1517

def _get_as_requested_source_keys(self, orig_keys, locations, unadded_keys,

1518

key_to_source_map):

1519

source_keys = []

1520

current_source = None

1521

for key in orig_keys:

1522

if key in locations or key in unadded_keys:

1523

source = self

1524

elif key in key_to_source_map:

1525

source = key_to_source_map[key]

1526

else: # absent

1527

continue

1528

if source is not current_source:

1529

source_keys.append((source, []))

1530

current_source = source

1531

source_keys[-1][1].append(key)

1532

return source_keys

1533

1534

def _get_io_ordered_source_keys(self, locations, unadded_keys,

1535

source_result):

1536

def get_group(key):

1537

# This is the group the bytes are stored in, followed by the

1538

# location in the group

1539

return locations[key][0]

1540

present_keys = sorted(locations.iterkeys(), key=get_group)

1541

# We don't have an ordering for keys in the in-memory object, but

1542

# lets process the in-memory ones first.

1543

present_keys = list(unadded_keys) + present_keys

1544

# Now grab all of the ones from other sources

1545

source_keys = [(self, present_keys)]

1546

source_keys.extend(source_result)

1547

return source_keys

1548

1549

def _get_remaining_record_stream(self, keys, orig_keys, ordering,

1550

include_delta_closure):

1551

"""Get a stream of records for keys.

1552

1553

:param keys: The keys to include.

1554

:param ordering: one of 'unordered', 'topological', 'groupcompress' or

1555

'as-requested'

1556

:param include_delta_closure: If True then the closure across any

1557

compression parents will be included (in the opaque data).

1558

:return: An iterator of ContentFactory objects, each of which is only

1559

valid until the iterator is advanced.

1560

"""

1561

# Cheap: iterate

1562

locations = self._index.get_build_details(keys)

1563

unadded_keys = set(self._unadded_refs).intersection(keys)

1564

missing = keys.difference(locations)

1565

missing.difference_update(unadded_keys)

1566

(fallback_parent_map, key_to_source_map,

1567

source_result) = self._find_from_fallback(missing)

1568

if ordering in ('topological', 'groupcompress'):

1569

# would be better to not globally sort initially but instead

1570

# start with one key, recurse to its oldest parent, then grab

1571

# everything in the same group, etc.

1572

parent_map = dict((key, details[2]) for key, details in

1573

locations.iteritems())

1574

for key in unadded_keys:

1575

parent_map[key] = self._unadded_refs[key]

1576

parent_map.update(fallback_parent_map)

1577

source_keys = self._get_ordered_source_keys(ordering, parent_map,

1578

key_to_source_map)

1579

elif ordering == 'as-requested':

1580

source_keys = self._get_as_requested_source_keys(orig_keys,

1581

locations, unadded_keys, key_to_source_map)

1582

else:

1583

# We want to yield the keys in a semi-optimal (read-wise) ordering.

1584

# Otherwise we thrash the _group_cache and destroy performance

1585

source_keys = self._get_io_ordered_source_keys(locations,

1586

unadded_keys, source_result)

1587

for key in missing:

1588

yield AbsentContentFactory(key)

1589

# Batch up as many keys as we can until either:

1590

# - we encounter an unadded ref, or

1591

# - we run out of keys, or

1592

# - the total bytes to retrieve for this batch > BATCH_SIZE

1593

batcher = _BatchingBlockFetcher(self, locations,

1594

get_compressor_settings=self._get_compressor_settings)

1595

for source, keys in source_keys:

1596

if source is self:

1597

for key in keys:

1598

if key in self._unadded_refs:

1599

# Flush batch, then yield unadded ref from

1600

# self._compressor.

1601

for factory in batcher.yield_factories(full_flush=True):

1602

yield factory

1603

bytes, sha1 = self._compressor.extract(key)

1604

parents = self._unadded_refs[key]

1605

yield FulltextContentFactory(key, parents, sha1, bytes)

1606

continue

1607

if batcher.add_key(key) > BATCH_SIZE:

1608

# Ok, this batch is big enough. Yield some results.

1609

for factory in batcher.yield_factories():

1610

yield factory

1611

else:

1612

for factory in batcher.yield_factories(full_flush=True):

1613

yield factory

1614

for record in source.get_record_stream(keys, ordering,

1615

include_delta_closure):

1616

yield record

1617

for factory in batcher.yield_factories(full_flush=True):

1618

yield factory

1619

1620

def get_sha1s(self, keys):

1621

"""See VersionedFiles.get_sha1s()."""

1622

result = {}

1623

for record in self.get_record_stream(keys, 'unordered', True):

1624

if record.sha1 != None:

1625

result[record.key] = record.sha1

1626

else:

1627

if record.storage_kind != 'absent':

1628

result[record.key] = osutils.sha_string(

1629

record.get_bytes_as('fulltext'))

1630

return result

1631

1632

def insert_record_stream(self, stream):

1633

"""Insert a record stream into this container.

1634

1635

:param stream: A stream of records to insert.

1636

:return: None

1637

:seealso VersionedFiles.get_record_stream:

1638

"""

1639

# XXX: Setting random_id=True makes

1640

# test_insert_record_stream_existing_keys fail for groupcompress and

1641

# groupcompress-nograph, this needs to be revisited while addressing

1642

# 'bzr branch' performance issues.

1643

for _ in self._insert_record_stream(stream, random_id=False):

1644

pass

1645

1646

def _get_compressor_settings(self):

1647

if self._max_bytes_to_index is None:

1648

# TODO: VersionedFiles don't know about their containing

1649

# repository, so they don't have much of an idea about their

1650

# location. So for now, this is only a global option.

1651

c = config.GlobalConfig()

1652

val = c.get_user_option('bzr.groupcompress.max_bytes_to_index')

1653

if val is not None:

1654

try:

1655

val = int(val)

1656

except ValueError, e:

1657

trace.warning('Value for '

1658

'"bzr.groupcompress.max_bytes_to_index"'

1659

' %r is not an integer'

1660

% (val,))

1661

val = None

1662

if val is None:

1663

val = self._DEFAULT_MAX_BYTES_TO_INDEX

1664

self._max_bytes_to_index = val

1665

return {'max_bytes_to_index': self._max_bytes_to_index}

1666

1667

def _make_group_compressor(self):

1668

return GroupCompressor(self._get_compressor_settings())

1669

1670

def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,

1671

reuse_blocks=True):

1672

"""Internal core to insert a record stream into this container.

1673

1674

This helper function has a different interface than insert_record_stream

1675

to allow add_lines to be minimal, but still return the needed data.

1676

1677

:param stream: A stream of records to insert.

1678

:param nostore_sha: If the sha1 of a given text matches nostore_sha,

1679

raise ExistingContent, rather than committing the new text.

1680

:param reuse_blocks: If the source is streaming from

1681

groupcompress-blocks, just insert the blocks as-is, rather than

1682

expanding the texts and inserting again.

1683

:return: An iterator over the sha1 of the inserted records.

1684

:seealso insert_record_stream:

1685

:seealso add_lines:

1686

"""

1687

adapters = {}

1688

def get_adapter(adapter_key):

1689

try:

1690

return adapters[adapter_key]

1691

except KeyError:

1692

adapter_factory = adapter_registry.get(adapter_key)

1693

adapter = adapter_factory(self)

1694

adapters[adapter_key] = adapter

1695

return adapter

1696

# This will go up to fulltexts for gc to gc fetching, which isn't

1697

# ideal.

1698

self._compressor = self._make_group_compressor()

1699

self._unadded_refs = {}

1700

keys_to_add = []

1701

def flush():

1702

bytes_len, chunks = self._compressor.flush().to_chunks()

1703

self._compressor = self._make_group_compressor()

1704

# Note: At this point we still have 1 copy of the fulltext (in

1705

# record and the var 'bytes'), and this generates 2 copies of

1706

# the compressed text (one for bytes, one in chunks)

1707

# TODO: Push 'chunks' down into the _access api, so that we don't

1708

# have to double compressed memory here

1709

# TODO: Figure out how to indicate that we would be happy to free

1710

# the fulltext content at this point. Note that sometimes we

1711

# will want it later (streaming CHK pages), but most of the

1712

# time we won't (everything else)

1713

bytes = ''.join(chunks)

1714

del chunks

1715

index, start, length = self._access.add_raw_records(

1716

[(None, len(bytes))], bytes)[0]

1717

nodes = []

1718

for key, reads, refs in keys_to_add:

1719

nodes.append((key, "%d %d %s" % (start, length, reads), refs))

1720

self._index.add_records(nodes, random_id=random_id)

1721

self._unadded_refs = {}

1722

del keys_to_add[:]

1723

1724

last_prefix = None

1725

max_fulltext_len = 0

1726

max_fulltext_prefix = None

1727

insert_manager = None

1728

block_start = None

1729

block_length = None

1730

# XXX: TODO: remove this, it is just for safety checking for now

1731

inserted_keys = set()

1732

reuse_this_block = reuse_blocks

1733

for record in stream:

1734

# Raise an error when a record is missing.

1735

if record.storage_kind == 'absent':

1736

raise errors.RevisionNotPresent(record.key, self)

1737

if random_id:

1738

if record.key in inserted_keys:

1739

trace.note(gettext('Insert claimed random_id=True,'

1740

' but then inserted %r two times'), record.key)

1741

continue

1742

inserted_keys.add(record.key)

1743

if reuse_blocks:

1744

# If the reuse_blocks flag is set, check to see if we can just

1745

# copy a groupcompress block as-is.

1746

# We only check on the first record (groupcompress-block) not

1747

# on all of the (groupcompress-block-ref) entries.

1748

# The reuse_this_block flag is then kept for as long as

1749

if record.storage_kind == 'groupcompress-block':

1750

# Check to see if we really want to re-use this block

1751

insert_manager = record._manager

1752

reuse_this_block = insert_manager.check_is_well_utilized()

1753

else:

1754

reuse_this_block = False

1755

if reuse_this_block:

1756

# We still want to reuse this block

1757

if record.storage_kind == 'groupcompress-block':

1758

# Insert the raw block into the target repo

1759

insert_manager = record._manager

1760

bytes = record._manager._block.to_bytes()

1761

_, start, length = self._access.add_raw_records(

1762

[(None, len(bytes))], bytes)[0]

1763

del bytes

1764

block_start = start

1765

block_length = length

1766

if record.storage_kind in ('groupcompress-block',

1767

'groupcompress-block-ref'):

1768

if insert_manager is None:

1769

raise AssertionError('No insert_manager set')

1770

if insert_manager is not record._manager:

1771

raise AssertionError('insert_manager does not match'

1772

' the current record, we cannot be positive'

1773

' that the appropriate content was inserted.'

1774

)

1775

value = "%d %d %d %d" % (block_start, block_length,

1776

record._start, record._end)

1777

nodes = [(record.key, value, (record.parents,))]

1778

# TODO: Consider buffering up many nodes to be added, not

1779

# sure how much overhead this has, but we're seeing

1780

# ~23s / 120s in add_records calls

1781

self._index.add_records(nodes, random_id=random_id)

1782

continue

1783

try:

1784

bytes = record.get_bytes_as('fulltext')

1785

except errors.UnavailableRepresentation:

1786

adapter_key = record.storage_kind, 'fulltext'

1787

adapter = get_adapter(adapter_key)

1788

bytes = adapter.get_bytes(record)

1789

if len(record.key) > 1:

1790

prefix = record.key[0]

1791

soft = (prefix == last_prefix)

1792

else:

1793

prefix = None

1794

soft = False

1795

if max_fulltext_len < len(bytes):

1796

max_fulltext_len = len(bytes)

1797

max_fulltext_prefix = prefix

1798

(found_sha1, start_point, end_point,

1799

type) = self._compressor.compress(record.key,

1800

bytes, record.sha1, soft=soft,

1801

nostore_sha=nostore_sha)

1802

# delta_ratio = float(len(bytes)) / (end_point - start_point)

1803

# Check if we want to continue to include that text

1804

if (prefix == max_fulltext_prefix

1805

and end_point < 2 * max_fulltext_len):

1806

# As long as we are on the same file_id, we will fill at least

1807

# 2 * max_fulltext_len

1808

start_new_block = False

1809

elif end_point > 4*1024*1024:

1810

start_new_block = True

1811

elif (prefix is not None and prefix != last_prefix

1812

and end_point > 2*1024*1024):

1813

start_new_block = True

1814

else:

1815

start_new_block = False

1816

last_prefix = prefix

1817

if start_new_block:

1818

self._compressor.pop_last()

1819

flush()

1820

max_fulltext_len = len(bytes)

1821

(found_sha1, start_point, end_point,

1822

type) = self._compressor.compress(record.key, bytes,

1823

record.sha1)

1824

if record.key[-1] is None:

1825

key = record.key[:-1] + ('sha1:' + found_sha1,)

1826

else:

1827

key = record.key

1828

self._unadded_refs[key] = record.parents

1829

yield found_sha1

1830

as_st = static_tuple.StaticTuple.from_sequence

1831

if record.parents is not None:

1832

parents = as_st([as_st(p) for p in record.parents])

1833

else:

1834

parents = None

1835

refs = static_tuple.StaticTuple(parents)

1836

keys_to_add.append((key, '%d %d' % (start_point, end_point), refs))

1837

if len(keys_to_add):

1838

flush()

1839

self._compressor = None

1840

1841

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1842

"""Iterate over the lines in the versioned files from keys.

1843

1844

This may return lines from other keys. Each item the returned

1845

iterator yields is a tuple of a line and a text version that that line

1846

is present in (not introduced in).

1847

1848

Ordering of results is in whatever order is most suitable for the

1849

underlying storage format.

1850

1851

If a progress bar is supplied, it may be used to indicate progress.

1852

The caller is responsible for cleaning up progress bars (because this

1853

is an iterator).

1854

1855

NOTES:

1856

* Lines are normalised by the underlying store: they will all have \n

1857

terminators.

1858

* Lines are returned in arbitrary order.

1859

1860

:return: An iterator over (line, key).

1861

"""

1862

keys = set(keys)

1863

total = len(keys)

1864

# we don't care about inclusions, the caller cares.

1865

# but we need to setup a list of records to visit.

1866

# we need key, position, length

1867

for key_idx, record in enumerate(self.get_record_stream(keys,

1868

'unordered', True)):

1869

# XXX: todo - optimise to use less than full texts.

1870

key = record.key

1871

if pb is not None:

1872

pb.update('Walking content', key_idx, total)

1873

if record.storage_kind == 'absent':

1874

raise errors.RevisionNotPresent(key, self)

1875

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1876

for line in lines:

1877

yield line, key

1878

if pb is not None:

1879

pb.update('Walking content', total, total)

1880

1881

def keys(self):

1882

"""See VersionedFiles.keys."""

1883

if 'evil' in debug.debug_flags:

1884

trace.mutter_callsite(2, "keys scales with size of history")

1885

sources = [self._index] + self._immediate_fallback_vfs

1886

result = set()

1887

for source in sources:

1888

result.update(source.keys())

1889

return result

1890

1891

1892

class _GCBuildDetails(object):

1893

"""A blob of data about the build details.

1894

1895

This stores the minimal data, which then allows compatibility with the old

1896

api, without taking as much memory.

1897

"""

1898

1899

__slots__ = ('_index', '_group_start', '_group_end', '_basis_end',

1900

'_delta_end', '_parents')

1901

1902

method = 'group'

1903

compression_parent = None

1904

1905

def __init__(self, parents, position_info):

1906

self._parents = parents

1907

(self._index, self._group_start, self._group_end, self._basis_end,

1908

self._delta_end) = position_info

1909

1910

def __repr__(self):

1911

return '%s(%s, %s)' % (self.__class__.__name__,

1912

self.index_memo, self._parents)

1913

1914

@property

1915

def index_memo(self):

1916

return (self._index, self._group_start, self._group_end,

1917

self._basis_end, self._delta_end)

1918

1919

@property

1920

def record_details(self):

1921

return static_tuple.StaticTuple(self.method, None)

1922

1923

def __getitem__(self, offset):

1924

"""Compatibility thunk to act like a tuple."""

1925

if offset == 0:

1926

return self.index_memo

1927

elif offset == 1:

1928

return self.compression_parent # Always None

1929

elif offset == 2:

1930

return self._parents

1931

elif offset == 3:

1932

return self.record_details

1933

else:

1934

raise IndexError('offset out of range')

1935

1936

def __len__(self):

1937

return 4

1938

1939

1940

class _GCGraphIndex(object):

1941

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1942

1943

def __init__(self, graph_index, is_locked, parents=True,

1944

add_callback=None, track_external_parent_refs=False,

1945

inconsistency_fatal=True, track_new_keys=False):

1946

"""Construct a _GCGraphIndex on a graph_index.

1947

1948

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1949

:param is_locked: A callback, returns True if the index is locked and

1950

thus usable.

1951

:param parents: If True, record knits parents, if not do not record

1952

parents.

1953

:param add_callback: If not None, allow additions to the index and call

1954

this callback with a list of added GraphIndex nodes:

1955

[(node, value, node_refs), ...]

1956

:param track_external_parent_refs: As keys are added, keep track of the

1957

keys they reference, so that we can query get_missing_parents(),

1958

etc.

1959

:param inconsistency_fatal: When asked to add records that are already

1960

present, and the details are inconsistent with the existing

1961

record, raise an exception instead of warning (and skipping the

1962

record).

1963

"""

1964

self._add_callback = add_callback

1965

self._graph_index = graph_index

1966

self._parents = parents

1967

self.has_graph = parents

1968

self._is_locked = is_locked

1969

self._inconsistency_fatal = inconsistency_fatal

1970

# GroupCompress records tend to have the same 'group' start + offset

1971

# repeated over and over, this creates a surplus of ints

1972

self._int_cache = {}

1973

if track_external_parent_refs:

1974

self._key_dependencies = _KeyRefs(

1975

track_new_keys=track_new_keys)

1976

else:

1977

self._key_dependencies = None

1978

1979

def add_records(self, records, random_id=False):

1980

"""Add multiple records to the index.

1981

1982

This function does not insert data into the Immutable GraphIndex

1983

backing the KnitGraphIndex, instead it prepares data for insertion by

1984

the caller and checks that it is safe to insert then calls

1985

self._add_callback with the prepared GraphIndex nodes.

1986

1987

:param records: a list of tuples:

1988

(key, options, access_memo, parents).

1989

:param random_id: If True the ids being added were randomly generated

1990

and no check for existence will be performed.

1991

"""

1992

if not self._add_callback:

1993

raise errors.ReadOnlyError(self)

1994

# we hope there are no repositories with inconsistent parentage

1995

# anymore.

1996

1997

changed = False

1998

keys = {}

1999

for (key, value, refs) in records:

2000

if not self._parents:

2001

if refs:

2002

for ref in refs:

2003

if ref:

2004

raise errors.KnitCorrupt(self,

2005

"attempt to add node with parents "

2006

"in parentless index.")

2007

refs = ()

2008

changed = True

2009

keys[key] = (value, refs)

2010

# check for dups

2011

if not random_id:

2012

present_nodes = self._get_entries(keys)

2013

for (index, key, value, node_refs) in present_nodes:

2014

# Sometimes these are passed as a list rather than a tuple

2015

node_refs = static_tuple.as_tuples(node_refs)

2016

passed = static_tuple.as_tuples(keys[key])

2017

if node_refs != passed[1]:

2018

details = '%s %s %s' % (key, (value, node_refs), passed)

2019

if self._inconsistency_fatal:

2020

raise errors.KnitCorrupt(self, "inconsistent details"

2021

" in add_records: %s" %

2022

details)

2023

else:

2024

trace.warning("inconsistent details in skipped"

2025

" record: %s", details)

2026

del keys[key]

2027

changed = True

2028

if changed:

2029

result = []

2030

if self._parents:

2031

for key, (value, node_refs) in keys.iteritems():

2032

result.append((key, value, node_refs))

2033

else:

2034

for key, (value, node_refs) in keys.iteritems():

2035

result.append((key, value))

2036

records = result

2037

key_dependencies = self._key_dependencies

2038

if key_dependencies is not None:

2039

if self._parents:

2040

for key, value, refs in records:

2041

parents = refs[0]

2042

key_dependencies.add_references(key, parents)

2043

else:

2044

for key, value, refs in records:

2045

new_keys.add_key(key)

2046

self._add_callback(records)

2047

2048

def _check_read(self):

2049

"""Raise an exception if reads are not permitted."""

2050

if not self._is_locked():

2051

raise errors.ObjectNotLocked(self)

2052

2053

def _check_write_ok(self):

2054

"""Raise an exception if writes are not permitted."""

2055

if not self._is_locked():

2056

raise errors.ObjectNotLocked(self)

2057

2058

def _get_entries(self, keys, check_present=False):

2059

"""Get the entries for keys.

2060

2061

Note: Callers are responsible for checking that the index is locked

2062

before calling this method.

2063

2064

:param keys: An iterable of index key tuples.

2065

"""

2066

keys = set(keys)

2067

found_keys = set()

2068

if self._parents:

2069

for node in self._graph_index.iter_entries(keys):

2070

yield node

2071

found_keys.add(node[1])

2072

else:

2073

# adapt parentless index to the rest of the code.

2074

for node in self._graph_index.iter_entries(keys):

2075

yield node[0], node[1], node[2], ()

2076

found_keys.add(node[1])

2077

if check_present:

2078

missing_keys = keys.difference(found_keys)

2079

if missing_keys:

2080

raise errors.RevisionNotPresent(missing_keys.pop(), self)

2081

2082

def find_ancestry(self, keys):

2083

"""See CombinedGraphIndex.find_ancestry"""

2084

return self._graph_index.find_ancestry(keys, 0)

2085

2086

def get_parent_map(self, keys):

2087

"""Get a map of the parents of keys.

2088

2089

:param keys: The keys to look up parents for.

2090

:return: A mapping from keys to parents. Absent keys are absent from

2091

the mapping.

2092

"""

2093

self._check_read()

2094

nodes = self._get_entries(keys)

2095

result = {}

2096

if self._parents:

2097

for node in nodes:

2098

result[node[1]] = node[3][0]

2099

else:

2100

for node in nodes:

2101

result[node[1]] = None

2102

return result

2103

2104

def get_missing_parents(self):

2105

"""Return the keys of missing parents."""

2106

# Copied from _KnitGraphIndex.get_missing_parents

2107

# We may have false positives, so filter those out.

2108

self._key_dependencies.satisfy_refs_for_keys(

2109

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

2110

return frozenset(self._key_dependencies.get_unsatisfied_refs())

2111

2112

def get_build_details(self, keys):

2113

"""Get the various build details for keys.

2114

2115

Ghosts are omitted from the result.

2116

2117

:param keys: An iterable of keys.

2118

:return: A dict of key:

2119

(index_memo, compression_parent, parents, record_details).

2120

2121

* index_memo: opaque structure to pass to read_records to extract

2122

the raw data

2123

* compression_parent: Content that this record is built upon, may

2124

be None

2125

* parents: Logical parents of this node

2126

* record_details: extra information about the content which needs

2127

to be passed to Factory.parse_record

2128

"""

2129

self._check_read()

2130

result = {}

2131

entries = self._get_entries(keys)

2132

for entry in entries:

2133

key = entry[1]

2134

if not self._parents:

2135

parents = None

2136

else:

2137

parents = entry[3][0]

2138

details = _GCBuildDetails(parents, self._node_to_position(entry))

2139

result[key] = details

2140

return result

2141

2142

def keys(self):

2143

"""Get all the keys in the collection.

2144

2145

The keys are not ordered.

2146

"""

2147

self._check_read()

2148

return [node[1] for node in self._graph_index.iter_all_entries()]

2149

2150

def _node_to_position(self, node):

2151

"""Convert an index value to position details."""

2152

bits = node[2].split(' ')

2153

# It would be nice not to read the entire gzip.

2154

# start and stop are put into _int_cache because they are very common.

2155

# They define the 'group' that an entry is in, and many groups can have

2156

# thousands of objects.

2157

# Branching Launchpad, for example, saves ~600k integers, at 12 bytes

2158

# each, or about 7MB. Note that it might be even more when you consider

2159

# how PyInt is allocated in separate slabs. And you can't return a slab

2160

# to the OS if even 1 int on it is in use. Note though that Python uses

2161

# a LIFO when re-using PyInt slots, which might cause more

2162

# fragmentation.

2163

start = int(bits[0])

2164

start = self._int_cache.setdefault(start, start)

2165

stop = int(bits[1])

2166

stop = self._int_cache.setdefault(stop, stop)

2167

basis_end = int(bits[2])

2168

delta_end = int(bits[3])

2169

# We can't use StaticTuple here, because node[0] is a BTreeGraphIndex

2170

# instance...

2171

return (node[0], start, stop, basis_end, delta_end)

2172

2173

def scan_unvalidated_index(self, graph_index):

2174

"""Inform this _GCGraphIndex that there is an unvalidated index.

2175

2176

This allows this _GCGraphIndex to keep track of any missing

2177

compression parents we may want to have filled in to make those

2178

indices valid. It also allows _GCGraphIndex to track any new keys.

2179

2180

:param graph_index: A GraphIndex

2181

"""

2182

key_dependencies = self._key_dependencies

2183

if key_dependencies is None:

2184

return

2185

for node in graph_index.iter_all_entries():

2186

# Add parent refs from graph_index (and discard parent refs

2187

# that the graph_index has).

2188

key_dependencies.add_references(node[1], node[3][0])

2189

2190

2191

from bzrlib._groupcompress_py import (

2192

apply_delta,

2193

apply_delta_to_source,

2194

encode_base128_int,

2195

decode_base128_int,

2196

decode_copy_instruction,

2197

LinesDeltaIndex,

2198

)

2199

try:

2200

from bzrlib._groupcompress_pyx import (

2201

apply_delta,

2202

apply_delta_to_source,

2203

DeltaIndex,

2204

encode_base128_int,

2205

decode_base128_int,

2206

)

2207

GroupCompressor = PyrexGroupCompressor

2208

except ImportError, e:

2209

osutils.failed_to_load_extension(e)

2210

GroupCompressor = PythonGroupCompressor

2211

Older »