~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Martin Pool
Date: 2007-04-04 06:17:31 UTC
mto: This revision was merged to the branch mainline in revision 2397.
Revision ID: mbp@sourcefrog.net-20070404061731-tt2xrzllqhbodn83

Contents of TODO file moved into bug tracker

files added:
build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/xml6.py

doc/README.1st

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v4.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/counted_lock.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory_delta.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/rules.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_reference.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_chk/test_unsupported.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/transport/brokenrename.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats/format_custom.py

bzrlib/views.py

bzrlib/xml5.py

bzrlib/xml6.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/help_topics/__init__.py => bzrlib/help_topics.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/plugins/launchpad/lp_directory.py => bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_directory.py => bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/http_server.py => bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/transport/remote.py => bzrlib/transport/smart.py

bzrlib/xml8.py => bzrlib/xml5.py

doc/en/tutorials/centralized_workflow.txt => doc/centralized_workflow.txt

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/http_smart_server.txt => doc/http_smart_server.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/server.txt => doc/server.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

doc/en/user-guide/version_info.txt => doc/version_info.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/registry.py

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/version.py

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml7.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/bazaar-vcs.org.kid

doc/default.css

generate_docs.py

profile_imports.py

setup.py *

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""DirState objects record the state of a directory and its bzr metadata.

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 3", NL;

header line = "#bazaar dirstate flat format 2", NL;

full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", WHOLE_NUMBER, NL;

row count = "num_entries: ", digit, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

'a' is an absent entry: In that tree the id is not present at this path.

'd' is a directory entry: This path in this tree is a directory with the

current file id. There is no fingerprint for directories.

'f' is a file entry: As for directory, but it's a file. The fingerprint is the

sha1 value of the file's canonical form, i.e. after any read filters have

been applied to the convenience form stored in the working tree.

'f' is a file entry: As for directory, but its a file. The fingerprint is a

sha1 value.

'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the

link target.

't' is a reference to a nested subtree; the fingerprint is the referenced

100

101

100

--- Format 1 had the following different definition: ---

102

101

rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,

103

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

102

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

104

103

{PARENT ROW}

105

104

PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,

106

105

basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,

119

118

where we need id->path mapping; we also usually read the whole file, so

120

119

I'm going to skip that for the moment, as we have the ability to locate

121

120

via bisect any path in any tree, and if we lookup things by path, we can

122

accumulate an id->path mapping as we go, which will tend to match what we

121

accumulate a id->path mapping as we go, which will tend to match what we

123

122

looked for.

124

123

125

124

I plan to implement this asap, so please speak up now to alter/tweak the

131

130

operations for the less common but still occurs a lot status/diff/commit

132

131

on specific files). Operations on specific files involve a scan for all

133

132

the children of a path, *in every involved tree*, which the current

134

format did not accommodate.

133

format did not accommodate.

135

134

----

136

135

137

136

Design priorities:

144

143

Locking:

145

144

Eventually reuse dirstate objects across locks IFF the dirstate file has not

146

145

been modified, but will require that we flush/ignore cached stat-hit data

147

because we won't want to restat all files on disk just because a lock was

146

because we wont want to restat all files on disk just because a lock was

148

147

acquired, yet we cannot trust the data after the previous lock was released.

149

148

150

149

Memory representation:

151

150

vector of all directories, and vector of the childen ?

152

i.e.

153

root_entrie = (direntry for root, [parent_direntries_for_root]),

151

i.e.

152

root_entrie = (direntry for root, [parent_direntries_for_root]),

154

153

dirblocks = [

155

154

('', ['data for achild', 'data for bchild', 'data for cchild'])

156

155

('dir', ['achild', 'cchild', 'echild'])

159

158

- in-order for serialisation - this is 'dirblock' grouping.

160

159

- insertion of a file '/a' affects only the '/' child-vector, that is, to

161

160

insert 10K elements from scratch does not generates O(N^2) memoves of a

162

single vector, rather each individual, which tends to be limited to a

163

manageable number. Will scale badly on trees with 10K entries in a

161

single vector, rather each individual, which tends to be limited to a

162

manageable number. Will scale badly on trees with 10K entries in a

164

163

single directory. compare with Inventory.InventoryDirectory which has

165

164

a dictionary for the children. No bisect capability, can only probe for

166

exact matches, or grab all elements and sort.

167

- What's the risk of error here? Once we have the base format being processed

168

we should have a net win regardless of optimality. So we are going to

169

go with what seems reasonable.

165

exact matches, or grab all elements and sorta.

166

- Whats the risk of error here? Once we have the base format being processed

167

we should have a net win regardless of optimality. So we are going to

168

go with what seems reasonably.

170

169

open questions:

171

170

172

Maybe we should do a test profile of the core structure - 10K simulated

173

searches/lookups/etc?

171

maybe we should do a test profile of these core structure - 10K simulated searches/lookups/etc?

174

172

175

173

Objects for each row?

176

174

The lifetime of Dirstate objects is current per lock, but see above for

187

185

the file on disk, and then immediately discard, the overhead of object creation

188

186

becomes a significant cost.

189

187

190

Figures: Creating a tuple from 3 elements was profiled at 0.0625

188

Figures: Creating a tuple from from 3 elements was profiled at 0.0625

191

189

microseconds, whereas creating a object which is subclassed from tuple was

192

190

0.500 microseconds, and creating an object with 3 elements and slots was 3

193

191

microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get

201

199

202

200

"""

203

201

202

203

import base64

204

import bisect

205

import binascii

206

205

import errno

207

206

import os

208

207

from stat import S_IEXEC

209

import stat

210

208

import struct

211

209

import sys

212

210

import time

213

211

import zlib

214

212

215

213

from bzrlib import (

216

cache_utf8,

217

debug,

218

214

errors,

219

215

inventory,

220

216

lock,

223

219

)

224

220

225

221

226

# This is the Windows equivalent of ENOTDIR

227

# It is defined in pywin32.winerror, but we don't want a strong dependency for

228

# just an error code.

229

ERROR_PATH_NOT_FOUND = 3

230

ERROR_DIRECTORY = 267

231

232

233

if not getattr(struct, '_compile', None):

234

# Cannot pre-compile the dirstate pack_stat

235

def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):

236

"""Convert stat values into a packed representation."""

237

return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),

238

int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,

239

st.st_mode))[:-1]

240

else:

241

# compile the struct compiler we need, so as to only do it once

242

from _struct import Struct

243

_compiled_pack = Struct('>LLLLLL').pack

244

def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):

245

"""Convert stat values into a packed representation."""

246

# jam 20060614 it isn't really worth removing more entries if we

247

# are going to leave it in packed form.

248

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

249

# With all entries, filesize is 5.9M and read time is maybe 280ms

250

# well within the noise margin

251

252

# base64 encoding always adds a final newline, so strip it off

253

# The current version

254

return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),

255

st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]

256

# This is 0.060s / 1.520s faster by not encoding as much information

257

# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]

258

# This is not strictly faster than _encode(_pack())[:-1]

259

# return '%X.%X.%X.%X.%X.%X' % (

260

# st.st_size, int(st.st_mtime), int(st.st_ctime),

261

# st.st_dev, st.st_ino, st.st_mode)

262

# Similar to the _encode(_pack('>LL'))

263

# return '%X.%X' % (int(st.st_mtime), st.st_mode)

264

265

266

class SHA1Provider(object):

267

"""An interface for getting sha1s of a file."""

268

269

def sha1(self, abspath):

270

"""Return the sha1 of a file given its absolute path.

271

272

:param abspath: May be a filesystem encoded absolute path

273

or a unicode path.

274

"""

275

raise NotImplementedError(self.sha1)

276

277

def stat_and_sha1(self, abspath):

278

"""Return the stat and sha1 of a file given its absolute path.

279

280

:param abspath: May be a filesystem encoded absolute path

281

or a unicode path.

282

283

Note: the stat should be the stat of the physical file

284

while the sha may be the sha of its canonical content.

285

"""

286

raise NotImplementedError(self.stat_and_sha1)

287

288

289

class DefaultSHA1Provider(SHA1Provider):

290

"""A SHA1Provider that reads directly from the filesystem."""

291

292

def sha1(self, abspath):

293

"""Return the sha1 of a file given its absolute path."""

294

return osutils.sha_file_by_name(abspath)

295

296

def stat_and_sha1(self, abspath):

297

"""Return the stat and sha1 of a file given its absolute path."""

298

file_obj = file(abspath, 'rb')

299

try:

300

statvalue = os.fstat(file_obj.fileno())

301

sha1 = osutils.sha_file(file_obj)

302

finally:

303

file_obj.close()

304

return statvalue, sha1

222

class _Bisector(object):

223

"""This just keeps track of information as we are bisecting."""

305

224

306

225

307

226

class DirState(object):

309

228

310

229

A dirstate is a specialised data structure for managing local working

311

230

tree state information. Its not yet well defined whether it is platform

312

specific, and if it is how we detect/parameterize that.

231

specific, and if it is how we detect/parameterise that.

313

232

314

233

Dirstates use the usual lock_write, lock_read and unlock mechanisms.

315

234

Unlike most bzr disk formats, DirStates must be locked for reading, using

336

255

'r': 'relocated',

337

256

't': 'tree-reference',

338

257

}

339

_stat_to_minikind = {

340

stat.S_IFDIR:'d',

341

stat.S_IFREG:'f',

342

stat.S_IFLNK:'l',

343

}

344

258

_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain

345

259

# of using int conversion rather than a dict here. AND BLAME ANDREW IF

346

260

# it is faster.

362

276

HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'

363

277

HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'

364

278

365

def __init__(self, path, sha1_provider):

279

def __init__(self, path):

366

280

"""Create a DirState object.

367

281

282

Attributes of note:

283

284

:attr _root_entrie: The root row of the directory/file information,

285

- contains the path to / - '', ''

286

- kind of 'directory',

287

- the file id of the root in utf8

288

- size of 0

289

- a packed state

290

- and no sha information.

368

291

:param path: The path at which the dirstate file on disk should live.

369

:param sha1_provider: an object meeting the SHA1Provider interface.

370

292

"""

371

293

# _header_state and _dirblock_state represent the current state

372

294

# of the dirstate metadata and the per-row data respectiely.

374

296

# IN_MEMORY_UNMODIFIED indicates that what we have in memory

375

297

# is the same as is on disk

376

298

# IN_MEMORY_MODIFIED indicates that we have a modified version

377

# of what is on disk.

299

# of what is on disk.

378

300

# In future we will add more granularity, for instance _dirblock_state

379

301

# will probably support partially-in-memory as a separate variable,

380

302

# allowing for partially-in-memory unmodified and partially-in-memory

381

303

# modified states.

382

304

self._header_state = DirState.NOT_IN_MEMORY

383

305

self._dirblock_state = DirState.NOT_IN_MEMORY

384

# If true, an error has been detected while updating the dirstate, and

385

# for safety we're not going to commit to disk.

386

self._changes_aborted = False

387

306

self._dirblocks = []

388

307

self._ghosts = []

389

308

self._parents = []

392

311

self._lock_token = None

393

312

self._lock_state = None

394

313

self._id_index = None

395

# a map from packed_stat to sha's.

396

self._packed_stat_index = None

397

314

self._end_of_header = None

398

315

self._cutoff_time = None

399

316

self._split_path_cache = {}

400

317

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

401

self._sha1_provider = sha1_provider

402

if 'hashcache' in debug.debug_flags:

403

self._sha1_file = self._sha1_file_and_mutter

404

else:

405

self._sha1_file = self._sha1_provider.sha1

406

# These two attributes provide a simple cache for lookups into the

407

# dirstate in-memory vectors. By probing respectively for the last

408

# block, and for the next entry, we save nearly 2 bisections per path

409

# during commit.

410

self._last_block_index = None

411

self._last_entry_index = None

412

318

413

319

def __repr__(self):

414

320

return "%s(%r)" % \

418

324

"""Add a path to be tracked.

419

325

420

326

:param path: The path within the dirstate - '' is the root, 'foo' is the

421

path foo within the root, 'foo/bar' is the path bar within foo

327

path foo within the root, 'foo/bar' is the path bar within foo

422

328

within the root.

423

329

:param file_id: The file id of the path being added.

424

:param kind: The kind of the path, as a string like 'file',

330

:param kind: The kind of the path, as a string like 'file',

425

331

'directory', etc.

426

332

:param stat: The output of os.lstat for the path.

427

:param fingerprint: The sha value of the file's canonical form (i.e.

428

after any read filters have been applied),

333

:param fingerprint: The sha value of the file,

429

334

or the target of a symlink,

430

335

or the referenced revision id for tree-references,

431

336

or '' for directories.

432

337

"""

433

338

# adding a file:

434

# find the block its in.

339

# find the block its in.

435

340

# find the location in the block.

436

341

# check its not there

437

342

# add it.

438

#------- copied from inventory.ensure_normalized_name - keep synced.

343

#------- copied from inventory.make_entry

439

344

# --- normalized_filename wants a unicode basename only, so get one.

440

345

dirname, basename = osutils.split(path)

441

346

# we dont import normalized_filename directly because we want to be

450

355

# in the parent, or according to the special treatment for the root

451

356

if basename == '.' or basename == '..':

452

357

raise errors.InvalidEntryName(path)

453

# now that we've normalised, we need the correct utf8 path and

358

# now that we've normalised, we need the correct utf8 path and

454

359

# dirname and basename elements. This single encode and split should be

455

360

# faster than three separate encodes.

456

361

utf8path = (dirname + '/' + basename).strip('/').encode('utf8')

457

362

dirname, basename = osutils.split(utf8path)

458

# uses __class__ for speed; the check is needed for safety

459

if file_id.__class__ is not str:

460

raise AssertionError(

461

"must be a utf8 file_id not %s" % (type(file_id), ))

363

assert file_id.__class__ == str, \

364

"must be a utf8 file_id not %s" % (type(file_id))

462

365

# Make sure the file_id does not exist in this tree

463

rename_from = None

464

file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)

366

file_id_entry = self._get_entry(0, fileid_utf8=file_id)

465

367

if file_id_entry != (None, None):

466

if file_id_entry[1][0][0] == 'a':

467

if file_id_entry[0] != (dirname, basename, file_id):

468

# set the old name's current operation to rename

469

self.update_minimal(file_id_entry[0],

470

'r',

471

path_utf8='',

472

packed_stat='',

473

fingerprint=utf8path

474

)

475

rename_from = file_id_entry[0][0:2]

476

else:

477

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

478

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

479

info = '%s:%s' % (kind, path)

480

raise errors.DuplicateFileId(file_id, info)

368

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

369

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

370

info = '%s:%s' % (kind, path)

371

raise errors.DuplicateFileId(file_id, info)

481

372

first_key = (dirname, basename, '')

482

373

block_index, present = self._find_block_index_from_key(first_key)

483

374

if present:

484

375

# check the path is not in the tree

485

376

block = self._dirblocks[block_index][1]

486

377

entry_index, _ = self._find_entry_index(first_key, block)

487

while (entry_index < len(block) and

378

while (entry_index < len(block) and

488

379

block[entry_index][0][0:2] == first_key[0:2]):

489

380

if block[entry_index][1][0][0] not in 'ar':

490

381

# this path is in the dirstate in the current tree.

510

401

packed_stat = pack_stat(stat)

511

402

parent_info = self._empty_parent_info()

512

403

minikind = DirState._kind_to_minikind[kind]

513

if rename_from is not None:

514

if rename_from[0]:

515

old_path_utf8 = '%s/%s' % rename_from

516

else:

517

old_path_utf8 = rename_from[1]

518

parent_info[0] = ('r', old_path_utf8, 0, False, '')

519

404

if kind == 'file':

520

405

entry_data = entry_key, [

521

406

(minikind, fingerprint, size, False, packed_stat),

538

423

if not present:

539

424

block.insert(entry_index, entry_data)

540

425

else:

541

if block[entry_index][1][0][0] != 'a':

542

raise AssertionError(" %r(%r) already added" % (basename, file_id))

426

assert block[entry_index][1][0][0] == 'a', " %r(%r) already added" % (basename, file_id)

543

427

block[entry_index][1][0] = entry_data[1][0]

544

428

545

429

if kind == 'directory':

549

433

if self._id_index:

550

434

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

551

435

552

def _bisect(self, paths):

436

def _bisect(self, dir_name_list):

553

437

"""Bisect through the disk structure for specific rows.

554

438

555

:param paths: A list of paths to find

556

:return: A dict mapping path => entries for found entries. Missing

439

:param dir_name_list: A list of (dir, name) pairs.

440

:return: A dict mapping (dir, name) => entry for found entries. Missing

557

441

entries will not be in the map.

558

The list is not sorted, and entries will be populated

559

based on when they were read.

560

442

"""

561

443

self._requires_lock()

562

444

# We need the file pointer to be right after the initial header block

564

446

# If _dirblock_state was in memory, we should just return info from

565

447

# there, this function is only meant to handle when we want to read

566

448

# part of the disk.

567

if self._dirblock_state != DirState.NOT_IN_MEMORY:

568

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

449

assert self._dirblock_state == DirState.NOT_IN_MEMORY

569

450

570

451

# The disk representation is generally info + '\0\n\0' at the end. But

571

452

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

583

464

found = {}

584

465

585

466

# Avoid infinite seeking

586

max_count = 30*len(paths)

467

max_count = 30*len(dir_name_list)

587

468

count = 0

588

469

# pending is a list of places to look.

589

470

# each entry is a tuple of low, high, dir_names

591

472

# high -> the last byte offset (inclusive)

592

473

# dir_names -> The list of (dir, name) pairs that should be found in

593

474

# the [low, high] range

594

pending = [(low, high, paths)]

475

pending = [(low, high, dir_name_list)]

595

476

596

477

page_size = self._bisect_page_size

597

478

650

531

# Find what entries we are looking for, which occur before and

651

532

# after this first record.

652

533

after = start

653

if first_fields[1]:

654

first_path = first_fields[1] + '/' + first_fields[2]

655

else:

656

first_path = first_fields[2]

657

first_loc = _bisect_path_left(cur_files, first_path)

534

first_dir_name = (first_fields[1], first_fields[2])

535

first_loc = bisect.bisect_left(cur_files, first_dir_name)

658

536

659

537

# These exist before the current location

660

538

pre = cur_files[:first_loc]

677

555

else:

678

556

after = mid + len(block)

679

557

680

if last_fields[1]:

681

last_path = last_fields[1] + '/' + last_fields[2]

682

else:

683

last_path = last_fields[2]

684

last_loc = _bisect_path_right(post, last_path)

558

last_dir_name = (last_fields[1], last_fields[2])

559

last_loc = bisect.bisect_right(post, last_dir_name)

685

560

686

561

middle_files = post[:last_loc]

687

562

post = post[last_loc:]

692

567

# Either we will find them here, or we can mark them as

693

568

# missing.

694

569

695

if middle_files[0] == first_path:

570

if middle_files[0] == first_dir_name:

696

571

# We might need to go before this location

697

pre.append(first_path)

698

if middle_files[-1] == last_path:

699

post.insert(0, last_path)

572

pre.append(first_dir_name)

573

if middle_files[-1] == last_dir_name:

574

post.insert(0, last_dir_name)

700

575

701

576

# Find out what paths we have

702

paths = {first_path:[first_fields]}

703

# last_path might == first_path so we need to be

577

paths = {first_dir_name:[first_fields]}

578

# last_dir_name might == first_dir_name so we need to be

704

579

# careful if we should append rather than overwrite

705

580

if last_entry_num != first_entry_num:

706

paths.setdefault(last_path, []).append(last_fields)

581

paths.setdefault(last_dir_name, []).append(last_fields)

707

582

for num in xrange(first_entry_num+1, last_entry_num):

708

583

# TODO: jam 20070223 We are already splitting here, so

709

584

# shouldn't we just split the whole thing rather

710

585

# than doing the split again in add_one_record?

711

586

fields = entries[num].split('\0')

712

if fields[1]:

713

path = fields[1] + '/' + fields[2]

714

else:

715

path = fields[2]

716

paths.setdefault(path, []).append(fields)

587

dir_name = (fields[1], fields[2])

588

paths.setdefault(dir_name, []).append(fields)

717

589

718

for path in middle_files:

719

for fields in paths.get(path, []):

590

for dir_name in middle_files:

591

for fields in paths.get(dir_name, []):

720

592

# offset by 1 because of the opening '\0'

721

593

# consider changing fields_to_entry to avoid the

722

594

# extra list slice

723

595

entry = fields_to_entry(fields[1:])

724

found.setdefault(path, []).append(entry)

596

found.setdefault(dir_name, []).append(entry)

725

597

726

598

# Now we have split up everything into pre, middle, and post, and

727

599

# we have handled everything that fell in 'middle'.

744

616

_bisect_dirblocks is meant to find the contents of directories, which

745

617

differs from _bisect, which only finds individual entries.

746

618

747

:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].

619

:param dir_list: An sorted list of directory names ['', 'dir', 'foo'].

748

620

:return: A map from dir => entries_for_dir

749

621

"""

750

622

# TODO: jam 20070223 A lot of the bisecting logic could be shared

757

629

# If _dirblock_state was in memory, we should just return info from

758

630

# there, this function is only meant to handle when we want to read

759

631

# part of the disk.

760

if self._dirblock_state != DirState.NOT_IN_MEMORY:

761

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

632

assert self._dirblock_state == DirState.NOT_IN_MEMORY

633

762

634

# The disk representation is generally info + '\0\n\0' at the end. But

763

635

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

764

636

# Because it means we can sync on the '\n'

917

789

918

790

return found

919

791

920

def _bisect_recursive(self, paths):

792

def _bisect_recursive(self, dir_name_list):

921

793

"""Bisect for entries for all paths and their children.

922

794

923

795

This will use bisect to find all records for the supplied paths. It

936

808

# Directories that have been read

937

809

processed_dirs = set()

938

810

# Get the ball rolling with the first bisect for all entries.

939

newly_found = self._bisect(paths)

811

newly_found = self._bisect(dir_name_list)

940

812

941

813

while newly_found:

942

814

# Directories that need to be read

966

838

if dir_name[0] in pending_dirs:

967

839

# This entry will be found in the dir search

968

840

continue

841

# TODO: We need to check if this entry has

842

# already been found. Otherwise we might be

843

# hitting infinite recursion.

969

844

if dir_name not in found_dir_names:

970

paths_to_search.add(tree_info[1])

845

paths_to_search.add(dir_name)

971

846

# Now we have a list of paths to look for directly, and

972

847

# directory blocks that need to be read.

973

848

# newly_found is mixing the keys between (dir, name) and path

978

853

processed_dirs.update(pending_dirs)

979

854

return found

980

855

981

def _discard_merge_parents(self):

982

"""Discard any parents trees beyond the first.

983

984

Note that if this fails the dirstate is corrupted.

985

986

After this function returns the dirstate contains 2 trees, neither of

987

which are ghosted.

988

"""

989

self._read_header_if_needed()

990

parents = self.get_parent_ids()

991

if len(parents) < 1:

992

return

993

# only require all dirblocks if we are doing a full-pass removal.

994

self._read_dirblocks_if_needed()

995

dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])

996

def iter_entries_removable():

997

for block in self._dirblocks:

998

deleted_positions = []

999

for pos, entry in enumerate(block[1]):

1000

yield entry

1001

if (entry[1][0][0], entry[1][1][0]) in dead_patterns:

1002

deleted_positions.append(pos)

1003

if deleted_positions:

1004

if len(deleted_positions) == len(block[1]):

1005

del block[1][:]

1006

else:

1007

for pos in reversed(deleted_positions):

1008

del block[1][pos]

1009

# if the first parent is a ghost:

1010

if parents[0] in self.get_ghosts():

1011

empty_parent = [DirState.NULL_PARENT_DETAILS]

1012

for entry in iter_entries_removable():

1013

entry[1][1:] = empty_parent

1014

else:

1015

for entry in iter_entries_removable():

1016

del entry[1][2:]

1017

1018

self._ghosts = []

1019

self._parents = [parents[0]]

1020

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1021

self._header_state = DirState.IN_MEMORY_MODIFIED

1022

1023

856

def _empty_parent_info(self):

1024

857

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

1025

858

len(self._ghosts))

1051

884

# the basename of the directory must be the end of its full name.

1052

885

if not (parent_block_index == -1 and

1053

886

parent_block_index == -1 and dirname == ''):

1054

if not dirname.endswith(

1055

self._dirblocks[parent_block_index][1][parent_row_index][0][1]):

1056

raise AssertionError("bad dirname %r" % dirname)

887

assert dirname.endswith(

888

self._dirblocks[parent_block_index][1][parent_row_index][0][1])

1057

889

block_index, present = self._find_block_index_from_key((dirname, '', ''))

1058

890

if not present:

1059

## In future, when doing partial parsing, this should load and

891

## In future, when doing partial parsing, this should load and

1060

892

# populate the entire block.

1061

893

self._dirblocks.insert(block_index, (dirname, []))

1062

894

return block_index

1071

903

to prevent unneeded overhead when callers have a sorted list already.

1072

904

:return: Nothing.

1073

905

"""

1074

if new_entries[0][0][0:2] != ('', ''):

1075

raise AssertionError(

1076

"Missing root row %r" % (new_entries[0][0],))

1077

# The two blocks here are deliberate: the root block and the

906

assert new_entries[0][0][0:2] == ('', ''), \

907

"Missing root row %r" % (new_entries[0][0],)

908

# The two blocks here are deliberate: the root block and the

1078

909

# contents-of-root block.

1079

910

self._dirblocks = [('', []), ('', [])]

1080

911

current_block = self._dirblocks[0][1]

1101

932

# The above loop leaves the "root block" entries mixed with the

1102

933

# "contents-of-root block". But we don't want an if check on

1103

934

# all entries, so instead we just fix it up here.

1104

if self._dirblocks[1] != ('', []):

1105

raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))

935

assert self._dirblocks[1] == ('', [])

1106

936

root_block = []

1107

937

contents_of_root_block = []

1108

938

for entry in self._dirblocks[0][1]:

1113

943

self._dirblocks[0] = ('', root_block)

1114

944

self._dirblocks[1] = ('', contents_of_root_block)

1115

945

1116

def _entries_for_path(self, path):

1117

"""Return a list with all the entries that match path for all ids."""

1118

dirname, basename = os.path.split(path)

1119

key = (dirname, basename, '')

1120

block_index, present = self._find_block_index_from_key(key)

1121

if not present:

1122

# the block which should contain path is absent.

1123

return []

1124

result = []

1125

block = self._dirblocks[block_index][1]

1126

entry_index, _ = self._find_entry_index(key, block)

1127

# we may need to look at multiple entries at this path: walk while the specific_files match.

1128

while (entry_index < len(block) and

1129

block[entry_index][0][0:2] == key[0:2]):

1130

result.append(block[entry_index])

1131

entry_index += 1

1132

return result

1133

1134

946

def _entry_to_line(self, entry):

1135

947

"""Serialize entry to a NULL delimited line ready for _get_output_lines.

1136

948

1192

1004

"""

1193

1005

if key[0:2] == ('', ''):

1194

1006

return 0, True

1195

try:

1196

if (self._last_block_index is not None and

1197

self._dirblocks[self._last_block_index][0] == key[0]):

1198

return self._last_block_index, True

1199

except IndexError:

1200

pass

1201

1007

block_index = bisect_dirblock(self._dirblocks, key[0], 1,

1202

1008

cache=self._split_path_cache)

1203

1009

# _right returns one-past-where-key is so we have to subtract

1204

1010

# one to use it. we use _right here because there are two

1205

1011

# '' blocks - the root, and the contents of root

1206

1012

# we always have a minimum of 2 in self._dirblocks: root and

1207

# root-contents, and for '', we get 2 back, so this is

1013

# root-contents, and for '', we get 2 back, so this is

1208

1014

# simple and correct:

1209

1015

present = (block_index < len(self._dirblocks) and

1210

1016

self._dirblocks[block_index][0] == key[0])

1211

self._last_block_index = block_index

1212

# Reset the entry index cache to the beginning of the block.

1213

self._last_entry_index = -1

1214

1017

return block_index, present

1215

1018

1216

1019

def _find_entry_index(self, key, block):

1218

1021

1219

1022

:return: The entry index, True if the entry for the key is present.

1220

1023

"""

1221

len_block = len(block)

1222

try:

1223

if self._last_entry_index is not None:

1224

# mini-bisect here.

1225

entry_index = self._last_entry_index + 1

1226

# A hit is when the key is after the last slot, and before or

1227

# equal to the next slot.

1228

if ((entry_index > 0 and block[entry_index - 1][0] < key) and

1229

key <= block[entry_index][0]):

1230

self._last_entry_index = entry_index

1231

present = (block[entry_index][0] == key)

1232

return entry_index, present

1233

except IndexError:

1234

pass

1235

1024

entry_index = bisect.bisect_left(block, (key, []))

1236

present = (entry_index < len_block and

1025

present = (entry_index < len(block) and

1237

1026

block[entry_index][0] == key)

1238

self._last_entry_index = entry_index

1239

1027

return entry_index, present

1240

1028

1241

1029

@staticmethod

1242

def from_tree(tree, dir_state_filename, sha1_provider=None):

1030

def from_tree(tree, dir_state_filename):

1243

1031

"""Create a dirstate from a bzr Tree.

1244

1032

1245

1033

:param tree: The tree which should provide parent information and

1246

1034

inventory ids.

1247

:param sha1_provider: an object meeting the SHA1Provider interface.

1248

If None, a DefaultSHA1Provider is used.

1249

1035

:return: a DirState object which is currently locked for writing.

1250

1036

(it was locked by DirState.initialize)

1251

1037

"""

1252

result = DirState.initialize(dir_state_filename,

1253

sha1_provider=sha1_provider)

1038

result = DirState.initialize(dir_state_filename)

1254

1039

try:

1255

1040

tree.lock_read()

1256

1041

try:

1274

1059

raise

1275

1060

return result

1276

1061

1277

def update_by_delta(self, delta):

1278

"""Apply an inventory delta to the dirstate for tree 0

1279

1280

:param delta: An inventory delta. See Inventory.apply_delta for

1281

details.

1282

"""

1283

self._read_dirblocks_if_needed()

1284

insertions = {}

1285

removals = {}

1286

for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):

1287

if (file_id in insertions) or (file_id in removals):

1288

raise AssertionError("repeated file id in delta %r" % (file_id,))

1289

if old_path is not None:

1290

old_path = old_path.encode('utf-8')

1291

removals[file_id] = old_path

1292

if new_path is not None:

1293

new_path = new_path.encode('utf-8')

1294

dirname, basename = osutils.split(new_path)

1295

key = (dirname, basename, file_id)

1296

minikind = DirState._kind_to_minikind[inv_entry.kind]

1297

if minikind == 't':

1298

fingerprint = inv_entry.reference_revision

1299

else:

1300

fingerprint = ''

1301

insertions[file_id] = (key, minikind, inv_entry.executable,

1302

fingerprint, new_path)

1303

# Transform moves into delete+add pairs

1304

if None not in (old_path, new_path):

1305

for child in self._iter_child_entries(0, old_path):

1306

if child[0][2] in insertions or child[0][2] in removals:

1307

continue

1308

child_dirname = child[0][0]

1309

child_basename = child[0][1]

1310

minikind = child[1][0][0]

1311

fingerprint = child[1][0][4]

1312

executable = child[1][0][3]

1313

old_child_path = osutils.pathjoin(child[0][0],

1314

child[0][1])

1315

removals[child[0][2]] = old_child_path

1316

child_suffix = child_dirname[len(old_path):]

1317

new_child_dirname = (new_path + child_suffix)

1318

key = (new_child_dirname, child_basename, child[0][2])

1319

new_child_path = os.path.join(new_child_dirname,

1320

child_basename)

1321

insertions[child[0][2]] = (key, minikind, executable,

1322

fingerprint, new_child_path)

1323

self._apply_removals(removals.values())

1324

self._apply_insertions(insertions.values())

1325

1326

def _apply_removals(self, removals):

1327

for path in sorted(removals, reverse=True):

1328

dirname, basename = osutils.split(path)

1329

block_i, entry_i, d_present, f_present = \

1330

self._get_block_entry_index(dirname, basename, 0)

1331

entry = self._dirblocks[block_i][1][entry_i]

1332

self._make_absent(entry)

1333

# See if we have a malformed delta: deleting a directory must not

1334

# leave crud behind. This increases the number of bisects needed

1335

# substantially, but deletion or renames of large numbers of paths

1336

# is rare enough it shouldn't be an issue (famous last words?) RBC

1337

# 20080730.

1338

block_i, entry_i, d_present, f_present = \

1339

self._get_block_entry_index(path, '', 0)

1340

if d_present:

1341

# The dir block is still present in the dirstate; this could

1342

# be due to it being in a parent tree, or a corrupt delta.

1343

for child_entry in self._dirblocks[block_i][1]:

1344

if child_entry[1][0][0] not in ('r', 'a'):

1345

raise errors.InconsistentDelta(path, entry[0][2],

1346

"The file id was deleted but its children were "

1347

"not deleted.")

1348

1349

def _apply_insertions(self, adds):

1350

for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):

1351

self.update_minimal(key, minikind, executable, fingerprint,

1352

path_utf8=path_utf8)

1353

1354

def update_basis_by_delta(self, delta, new_revid):

1355

"""Update the parents of this tree after a commit.

1356

1357

This gives the tree one parent, with revision id new_revid. The

1358

inventory delta is applied to the current basis tree to generate the

1359

inventory for the parent new_revid, and all other parent trees are

1360

discarded.

1361

1362

Note that an exception during the operation of this method will leave

1363

the dirstate in a corrupt state where it should not be saved.

1364

1365

Finally, we expect all changes to be synchronising the basis tree with

1366

the working tree.

1367

1368

:param new_revid: The new revision id for the trees parent.

1369

:param delta: An inventory delta (see apply_inventory_delta) describing

1370

the changes from the current left most parent revision to new_revid.

1371

"""

1372

self._read_dirblocks_if_needed()

1373

self._discard_merge_parents()

1374

if self._ghosts != []:

1375

raise NotImplementedError(self.update_basis_by_delta)

1376

if len(self._parents) == 0:

1377

# setup a blank tree, the most simple way.

1378

empty_parent = DirState.NULL_PARENT_DETAILS

1379

for entry in self._iter_entries():

1380

entry[1].append(empty_parent)

1381

self._parents.append(new_revid)

1382

1383

self._parents[0] = new_revid

1384

1385

delta = sorted(delta, reverse=True)

1386

adds = []

1387

changes = []

1388

deletes = []

1389

# The paths this function accepts are unicode and must be encoded as we

1390

# go.

1391

encode = cache_utf8.encode

1392

inv_to_entry = self._inv_entry_to_details

1393

# delta is now (deletes, changes), (adds) in reverse lexographical

1394

# order.

1395

# deletes in reverse lexographic order are safe to process in situ.

1396

# renames are not, as a rename from any path could go to a path

1397

# lexographically lower, so we transform renames into delete, add pairs,

1398

# expanding them recursively as needed.

1399

# At the same time, to reduce interface friction we convert the input

1400

# inventory entries to dirstate.

1401

root_only = ('', '')

1402

for old_path, new_path, file_id, inv_entry in delta:

1403

if old_path is None:

1404

adds.append((None, encode(new_path), file_id,

1405

inv_to_entry(inv_entry), True))

1406

elif new_path is None:

1407

deletes.append((encode(old_path), None, file_id, None, True))

1408

elif (old_path, new_path) != root_only:

1409

# Renames:

1410

# Because renames must preserve their children we must have

1411

# processed all relocations and removes before hand. The sort

1412

# order ensures we've examined the child paths, but we also

1413

# have to execute the removals, or the split to an add/delete

1414

# pair will result in the deleted item being reinserted, or

1415

# renamed items being reinserted twice - and possibly at the

1416

# wrong place. Splitting into a delete/add pair also simplifies

1417

# the handling of entries with ('f', ...), ('r' ...) because

1418

# the target of the 'r' is old_path here, and we add that to

1419

# deletes, meaning that the add handler does not need to check

1420

# for 'r' items on every pass.

1421

self._update_basis_apply_deletes(deletes)

1422

deletes = []

1423

new_path_utf8 = encode(new_path)

1424

# Split into an add/delete pair recursively.

1425

adds.append((None, new_path_utf8, file_id,

1426

inv_to_entry(inv_entry), False))

1427

# Expunge deletes that we've seen so that deleted/renamed

1428

# children of a rename directory are handled correctly.

1429

new_deletes = reversed(list(self._iter_child_entries(1,

1430

encode(old_path))))

1431

# Remove the current contents of the tree at orig_path, and

1432

# reinsert at the correct new path.

1433

for entry in new_deletes:

1434

if entry[0][0]:

1435

source_path = entry[0][0] + '/' + entry[0][1]

1436

else:

1437

source_path = entry[0][1]

1438

if new_path_utf8:

1439

target_path = new_path_utf8 + source_path[len(old_path):]

1440

else:

1441

if old_path == '':

1442

raise AssertionError("cannot rename directory to"

1443

" itself")

1444

target_path = source_path[len(old_path) + 1:]

1445

adds.append((None, target_path, entry[0][2], entry[1][1], False))

1446

deletes.append(

1447

(source_path, target_path, entry[0][2], None, False))

1448

deletes.append(

1449

(encode(old_path), new_path, file_id, None, False))

1450

else:

1451

# changes to just the root should not require remove/insertion

1452

# of everything.

1453

changes.append((encode(old_path), encode(new_path), file_id,

1454

inv_to_entry(inv_entry)))

1455

1456

# Finish expunging deletes/first half of renames.

1457

self._update_basis_apply_deletes(deletes)

1458

# Reinstate second half of renames and new paths.

1459

self._update_basis_apply_adds(adds)

1460

# Apply in-situ changes.

1461

self._update_basis_apply_changes(changes)

1462

1463

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1464

self._header_state = DirState.IN_MEMORY_MODIFIED

1465

self._id_index = None

1466

return

1467

1468

def _update_basis_apply_adds(self, adds):

1469

"""Apply a sequence of adds to tree 1 during update_basis_by_delta.

1470

1471

They may be adds, or renames that have been split into add/delete

1472

pairs.

1473

1474

:param adds: A sequence of adds. Each add is a tuple:

1475

(None, new_path_utf8, file_id, (entry_details), real_add). real_add

1476

is False when the add is the second half of a remove-and-reinsert

1477

pair created to handle renames and deletes.

1478

"""

1479

# Adds are accumulated partly from renames, so can be in any input

1480

# order - sort it.

1481

adds.sort()

1482

# adds is now in lexographic order, which places all parents before

1483

# their children, so we can process it linearly.

1484

absent = 'ar'

1485

for old_path, new_path, file_id, new_details, real_add in adds:

1486

# the entry for this file_id must be in tree 0.

1487

entry = self._get_entry(0, file_id, new_path)

1488

if entry[0] is None or entry[0][2] != file_id:

1489

self._changes_aborted = True

1490

raise errors.InconsistentDelta(new_path, file_id,

1491

'working tree does not contain new entry')

1492

if real_add and entry[1][1][0] not in absent:

1493

self._changes_aborted = True

1494

raise errors.InconsistentDelta(new_path, file_id,

1495

'The entry was considered to be a genuinely new record,'

1496

' but there was already an old record for it.')

1497

# We don't need to update the target of an 'r' because the handling

1498

# of renames turns all 'r' situations into a delete at the original

1499

# location.

1500

entry[1][1] = new_details

1501

1502

def _update_basis_apply_changes(self, changes):

1503

"""Apply a sequence of changes to tree 1 during update_basis_by_delta.

1504

1505

:param adds: A sequence of changes. Each change is a tuple:

1506

(path_utf8, path_utf8, file_id, (entry_details))

1507

"""

1508

absent = 'ar'

1509

for old_path, new_path, file_id, new_details in changes:

1510

# the entry for this file_id must be in tree 0.

1511

entry = self._get_entry(0, file_id, new_path)

1512

if entry[0] is None or entry[0][2] != file_id:

1513

self._changes_aborted = True

1514

raise errors.InconsistentDelta(new_path, file_id,

1515

'working tree does not contain new entry')

1516

if (entry[1][0][0] in absent or

1517

entry[1][1][0] in absent):

1518

self._changes_aborted = True

1519

raise errors.InconsistentDelta(new_path, file_id,

1520

'changed considered absent')

1521

entry[1][1] = new_details

1522

1523

def _update_basis_apply_deletes(self, deletes):

1524

"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.

1525

1526

They may be deletes, or renames that have been split into add/delete

1527

pairs.

1528

1529

:param deletes: A sequence of deletes. Each delete is a tuple:

1530

(old_path_utf8, new_path_utf8, file_id, None, real_delete).

1531

real_delete is True when the desired outcome is an actual deletion

1532

rather than the rename handling logic temporarily deleting a path

1533

during the replacement of a parent.

1534

"""

1535

null = DirState.NULL_PARENT_DETAILS

1536

for old_path, new_path, file_id, _, real_delete in deletes:

1537

if real_delete != (new_path is None):

1538

raise AssertionError("bad delete delta")

1539

# the entry for this file_id must be in tree 1.

1540

dirname, basename = osutils.split(old_path)

1541

block_index, entry_index, dir_present, file_present = \

1542

self._get_block_entry_index(dirname, basename, 1)

1543

if not file_present:

1544

self._changes_aborted = True

1545

raise errors.InconsistentDelta(old_path, file_id,

1546

'basis tree does not contain removed entry')

1547

entry = self._dirblocks[block_index][1][entry_index]

1548

if entry[0][2] != file_id:

1549

self._changes_aborted = True

1550

raise errors.InconsistentDelta(old_path, file_id,

1551

'mismatched file_id in tree 1')

1552

if real_delete:

1553

if entry[1][0][0] != 'a':

1554

self._changes_aborted = True

1555

raise errors.InconsistentDelta(old_path, file_id,

1556

'This was marked as a real delete, but the WT state'

1557

' claims that it still exists and is versioned.')

1558

del self._dirblocks[block_index][1][entry_index]

1559

else:

1560

if entry[1][0][0] == 'a':

1561

self._changes_aborted = True

1562

raise errors.InconsistentDelta(old_path, file_id,

1563

'The entry was considered a rename, but the source path'

1564

' is marked as absent.')

1565

# For whatever reason, we were asked to rename an entry

1566

# that was originally marked as deleted. This could be

1567

# because we are renaming the parent directory, and the WT

1568

# current state has the file marked as deleted.

1569

elif entry[1][0][0] == 'r':

1570

# implement the rename

1571

del self._dirblocks[block_index][1][entry_index]

1572

else:

1573

# it is being resurrected here, so blank it out temporarily.

1574

self._dirblocks[block_index][1][entry_index][1][1] = null

1575

1576

def _observed_sha1(self, entry, sha1, stat_value,

1577

_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):

1578

"""Note the sha1 of a file.

1579

1580

:param entry: The entry the sha1 is for.

1581

:param sha1: The observed sha1.

1582

:param stat_value: The os.lstat for the file.

1583

"""

1062

def update_entry(self, entry, abspath, stat_value=None):

1063

"""Update the entry based on what is actually on disk.

1064

1065

:param entry: This is the dirblock entry for the file in question.

1066

:param abspath: The path on disk for this file.

1067

:param stat_value: (optional) if we already have done a stat on the

1068

file, re-use it.

1069

:return: The sha1 hexdigest of the file (40 bytes) or link target of a

1070

symlink.

1071

"""

1072

# This code assumes that the entry passed in is directly held in one of

1073

# the internal _dirblocks. So the dirblock state must have already been

1074

# read.

1075

assert self._dirblock_state != DirState.NOT_IN_MEMORY

1076

if stat_value is None:

1077

try:

1078

# We could inline os.lstat but the common case is that

1079

# stat_value will be passed in, not read here.

1080

stat_value = self._lstat(abspath, entry)

1081

except (OSError, IOError), e:

1082

if e.errno in (errno.ENOENT, errno.EACCES,

1083

errno.EPERM):

1084

# The entry is missing, consider it gone

1085

return None

1086

raise

1087

1088

kind = osutils.file_kind_from_stat_mode(stat_value.st_mode)

1584

1089

try:

1585

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

1586

except KeyError:

1587

# Unhandled kind

1090

minikind = DirState._kind_to_minikind[kind]

1091

except KeyError: # Unknown kind

1588

1092

return None

1589

packed_stat = _pack_stat(stat_value)

1590

if minikind == 'f':

1093

packed_stat = pack_stat(stat_value)

1094

(saved_minikind, saved_link_or_sha1, saved_file_size,

1095

saved_executable, saved_packed_stat) = entry[1][0]

1096

1097

if (minikind == saved_minikind

1098

and packed_stat == saved_packed_stat

1099

# size should also be in packed_stat

1100

and saved_file_size == stat_value.st_size):

1101

# The stat hasn't changed since we saved, so we can potentially

1102

# re-use the saved sha hash.

1103

if minikind == 'd':

1104

return None

1105

1591

1106

if self._cutoff_time is None:

1592

1107

self._sha_cutoff_time()

1108

1593

1109

if (stat_value.st_mtime < self._cutoff_time

1594

1110

and stat_value.st_ctime < self._cutoff_time):

1595

entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],

1596

packed_stat)

1597

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1111

# Return the existing fingerprint

1112

return saved_link_or_sha1

1113

1114

# If we have gotten this far, that means that we need to actually

1115

# process this entry.

1116

link_or_sha1 = None

1117

if minikind == 'f':

1118

link_or_sha1 = self._sha1_file(abspath, entry)

1119

executable = self._is_executable(stat_value.st_mode,

1120

saved_executable)

1121

entry[1][0] = ('f', link_or_sha1, stat_value.st_size,

1122

executable, packed_stat)

1123

elif minikind == 'd':

1124

link_or_sha1 = None

1125

entry[1][0] = ('d', '', 0, False, packed_stat)

1126

if saved_minikind != 'd':

1127

# This changed from something into a directory. Make sure we

1128

# have a directory block for it. This doesn't happen very

1129

# often, so this doesn't have to be super fast.

1130

block_index, entry_index, dir_present, file_present = \

1131

self._get_block_entry_index(entry[0][0], entry[0][1], 0)

1132

self._ensure_block(block_index, entry_index,

1133

osutils.pathjoin(entry[0][0], entry[0][1]))

1134

elif minikind == 'l':

1135

link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)

1136

entry[1][0] = ('l', link_or_sha1, stat_value.st_size,

1137

False, packed_stat)

1138

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1139

return link_or_sha1

1598

1140

1599

1141

def _sha_cutoff_time(self):

1600

1142

"""Return cutoff time.

1613

1155

"""Return the os.lstat value for this path."""

1614

1156

return os.lstat(abspath)

1615

1157

1616

def _sha1_file_and_mutter(self, abspath):

1617

# when -Dhashcache is turned on, this is monkey-patched in to log

1618

# file reads

1619

trace.mutter("dirstate sha1 " + abspath)

1620

return self._sha1_provider.sha1(abspath)

1158

def _sha1_file(self, abspath, entry):

1159

"""Calculate the SHA1 of a file by reading the full text"""

1160

f = file(abspath, 'rb', buffering=65000)

1161

try:

1162

return osutils.sha_file(f)

1163

finally:

1164

f.close()

1621

1165

1622

1166

def _is_executable(self, mode, old_executable):

1623

1167

"""Is this file executable?"""

1636

1180

# already in memory. However, this really needs to be done at a

1637

1181

# higher level, because there either won't be anything on disk,

1638

1182

# or the thing on disk will be a file.

1639

fs_encoding = osutils._fs_enc

1640

if isinstance(abspath, unicode):

1641

# abspath is defined as the path to pass to lstat. readlink is

1642

# buggy in python < 2.6 (it doesn't encode unicode path into FS

1643

# encoding), so we need to encode ourselves knowing that unicode

1644

# paths are produced by UnicodeDirReader on purpose.

1645

abspath = abspath.encode(fs_encoding)

1646

target = os.readlink(abspath)

1647

if fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):

1648

# Change encoding if needed

1649

target = target.decode(fs_encoding).encode('UTF-8')

1650

return target

1183

return os.readlink(abspath)

1651

1184

1652

1185

def get_ghosts(self):

1653

1186

"""Return a list of the parent tree revision ids that are ghosts."""

1771

1304

be attempted.

1772

1305

:return: A tuple describing where the path is located, or should be

1773

1306

inserted. The tuple contains four fields: the block index, the row

1774

index, the directory is present (boolean), the entire path is

1775

present (boolean). There is no guarantee that either

1307

index, anda two booleans are True when the directory is present, and

1308

when the entire path is present. There is no guarantee that either

1776

1309

coordinate is currently reachable unless the found field for it is

1777

1310

True. For instance, a directory not present in the searched tree

1778

1311

may be returned with a value one greater than the current highest

1790

1323

return block_index, 0, False, False

1791

1324

block = self._dirblocks[block_index][1] # access the entries only

1792

1325

entry_index, present = self._find_entry_index(key, block)

1793

# linear search through entries at this path to find the one

1326

# linear search through present entries at this path to find the one

1794

1327

# requested.

1795

1328

while entry_index < len(block) and block[entry_index][0][1] == basename:

1796

if block[entry_index][1][tree_index][0] not in 'ar':

1797

# neither absent or relocated

1329

if block[entry_index][1][tree_index][0] not in \

1330

('a', 'r'): # absent, relocated

1798

1331

return block_index, entry_index, True, True

1799

1332

entry_index += 1

1800

1333

return block_index, entry_index, True, False

1801

1334

1802

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None, include_deleted=False):

1803

"""Get the dirstate entry for path in tree tree_index.

1335

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None):

1336

"""Get the dirstate entry for path in tree tree_index

1804

1337

1805

1338

If either file_id or path is supplied, it is used as the key to lookup.

1806

1339

If both are supplied, the fastest lookup is used, and an error is

1813

1346

trees.

1814

1347

:param fileid_utf8: A utf8 file_id to look up.

1815

1348

:param path_utf8: An utf8 path to be looked up.

1816

:param include_deleted: If True, and performing a lookup via

1817

fileid_utf8 rather than path_utf8, return an entry for deleted

1818

(absent) paths.

1819

1349

:return: The dirstate entry tuple for path, or (None, None)

1820

1350

"""

1821

1351

self._read_dirblocks_if_needed()

1822

1352

if path_utf8 is not None:

1823

if type(path_utf8) is not str:

1824

raise AssertionError('path_utf8 is not a str: %s %s'

1825

% (type(path_utf8), path_utf8))

1353

assert path_utf8.__class__ == str, 'path_utf8 is not a str: %s %s' % (type(path_utf8), path_utf8)

1826

1354

# path lookups are faster

1827

1355

dirname, basename = osutils.split(path_utf8)

1828

1356

block_index, entry_index, dir_present, file_present = \

1830

1358

if not file_present:

1831

1359

return None, None

1832

1360

entry = self._dirblocks[block_index][1][entry_index]

1833

if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')):

1834

raise AssertionError('unversioned entry?')

1361

assert entry[0][2] and entry[1][tree_index][0] not in ('a', 'r'), 'unversioned entry?!?!'

1835

1362

if fileid_utf8:

1836

1363

if entry[0][2] != fileid_utf8:

1837

self._changes_aborted = True

1838

1364

raise errors.BzrError('integrity error ? : mismatching'

1839

1365

' tree_index, file_id and path')

1840

1366

return entry

1841

1367

else:

1368

assert fileid_utf8 is not None

1842

1369

possible_keys = self._get_id_index().get(fileid_utf8, None)

1843

1370

if not possible_keys:

1844

1371

return None, None

1851

1378

continue

1852

1379

# WARNING: DO not change this code to use _get_block_entry_index

1853

1380

# as that function is not suitable: it does not use the key

1854

# to lookup, and thus the wrong coordinates are returned.

1381

# to lookup, and thus the wront coordinates are returned.

1855

1382

block = self._dirblocks[block_index][1]

1856

1383

entry_index, present = self._find_entry_index(key, block)

1857

1384

if present:

1858

1385

entry = self._dirblocks[block_index][1][entry_index]

1859

1386

if entry[1][tree_index][0] in 'fdlt':

1860

# this is the result we are looking for: the

1387

# this is the result we are looking for: the

1861

1388

# real home of this file_id in this tree.

1862

1389

return entry

1863

1390

if entry[1][tree_index][0] == 'a':

1864

1391

# there is no home for this entry in this tree

1865

if include_deleted:

1866

return entry

1867

1392

return None, None

1868

if entry[1][tree_index][0] != 'r':

1869

raise AssertionError(

1870

"entry %r has invalid minikind %r for tree %r" \

1871

% (entry,

1872

entry[1][tree_index][0],

1873

tree_index))

1393

assert entry[1][tree_index][0] == 'r', \

1394

"entry %r has invalid minikind %r for tree %r" \

1395

% (entry,

1396

entry[1][tree_index][0],

1397

tree_index)

1874

1398

real_path = entry[1][tree_index][1]

1875

1399

return self._get_entry(tree_index, fileid_utf8=fileid_utf8,

1876

1400

path_utf8=real_path)

1877

1401

return None, None

1878

1402

1879

1403

@classmethod

1880

def initialize(cls, path, sha1_provider=None):

1404

def initialize(cls, path):

1881

1405

"""Create a new dirstate on path.

1882

1406

1883

1407

The new dirstate will be an empty tree - that is it has no parents,

1884

1408

and only a root node - which has id ROOT_ID.

1885

1409

1410

The object will be write locked when returned to the caller,

1411

unless there was an exception in the writing, in which case it

1412

will be unlocked.

1413

1886

1414

:param path: The name of the file for the dirstate.

1887

:param sha1_provider: an object meeting the SHA1Provider interface.

1888

If None, a DefaultSHA1Provider is used.

1889

:return: A write-locked DirState object.

1415

:return: A DirState object.

1890

1416

"""

1891

1417

# This constructs a new DirState object on a path, sets the _state_file

1892

1418

# to a new empty file for that path. It then calls _set_data() with our

1893

1419

# stock empty dirstate information - a root with ROOT_ID, no children,

1894

1420

# and no parents. Finally it calls save() to ensure that this data will

1895

1421

# persist.

1896

if sha1_provider is None:

1897

sha1_provider = DefaultSHA1Provider()

1898

result = cls(path, sha1_provider)

1422

result = cls(path)

1899

1423

# root dir and root dir contents with no children.

1900

1424

empty_tree_dirblocks = [('', []), ('', [])]

1901

1425

# a new root directory, with a NULLSTAT.

1912

1436

raise

1913

1437

return result

1914

1438

1915

@staticmethod

1916

def _inv_entry_to_details(inv_entry):

1439

def _inv_entry_to_details(self, inv_entry):

1917

1440

"""Convert an inventory entry (from a revision tree) to state details.

1918

1441

1919

1442

:param inv_entry: An inventory entry whose sha1 and link targets can be

1924

1447

kind = inv_entry.kind

1925

1448

minikind = DirState._kind_to_minikind[kind]

1926

1449

tree_data = inv_entry.revision

1450

assert len(tree_data) > 0, 'empty revision for the inv_entry.'

1927

1451

if kind == 'directory':

1928

1452

fingerprint = ''

1929

1453

size = 0

1930

1454

executable = False

1931

1455

elif kind == 'symlink':

1932

if inv_entry.symlink_target is None:

1933

fingerprint = ''

1934

else:

1935

fingerprint = inv_entry.symlink_target.encode('utf8')

1456

fingerprint = inv_entry.symlink_target or ''

1936

1457

size = 0

1937

1458

executable = False

1938

1459

elif kind == 'file':

1947

1468

raise Exception("can't pack %s" % inv_entry)

1948

1469

return (minikind, fingerprint, size, executable, tree_data)

1949

1470

1950

def _iter_child_entries(self, tree_index, path_utf8):

1951

"""Iterate over all the entries that are children of path_utf.

1952

1953

This only returns entries that are present (not in 'a', 'r') in

1954

tree_index. tree_index data is not refreshed, so if tree 0 is used,

1955

results may differ from that obtained if paths were statted to

1956

determine what ones were directories.

1957

1958

Asking for the children of a non-directory will return an empty

1959

iterator.

1960

"""

1961

pending_dirs = []

1962

next_pending_dirs = [path_utf8]

1963

absent = 'ar'

1964

while next_pending_dirs:

1965

pending_dirs = next_pending_dirs

1966

next_pending_dirs = []

1967

for path in pending_dirs:

1968

block_index, present = self._find_block_index_from_key(

1969

(path, '', ''))

1970

if block_index == 0:

1971

block_index = 1

1972

if len(self._dirblocks) == 1:

1973

# asked for the children of the root with no other

1974

# contents.

1975

return

1976

if not present:

1977

# children of a non-directory asked for.

1978

continue

1979

block = self._dirblocks[block_index]

1980

for entry in block[1]:

1981

kind = entry[1][tree_index][0]

1982

if kind not in absent:

1983

yield entry

1984

if kind == 'd':

1985

if entry[0][0]:

1986

path = entry[0][0] + '/' + entry[0][1]

1987

else:

1988

path = entry[0][1]

1989

next_pending_dirs.append(path)

1990

1991

1471

def _iter_entries(self):

1992

1472

"""Iterate over all the entries in the dirstate.

1993

1473

2009

1489

return self._id_index

2010

1490

2011

1491

def _get_output_lines(self, lines):

2012

"""Format lines for final output.

1492

"""format lines for final output.

2013

1493

2014

:param lines: A sequence of lines containing the parents list and the

1494

:param lines: A sequece of lines containing the parents list and the

2015

1495

path lines.

2016

1496

"""

2017

1497

output_lines = [DirState.HEADER_FORMAT_3]

2025

1505

return output_lines

2026

1506

2027

1507

def _make_deleted_row(self, fileid_utf8, parents):

2028

"""Return a deleted row for fileid_utf8."""

1508

"""Return a deleted for for fileid_utf8."""

2029

1509

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

2030

1510

''), parents

2031

1511

2034

1514

return len(self._parents) - len(self._ghosts)

2035

1515

2036

1516

@staticmethod

2037

def on_file(path, sha1_provider=None):

2038

"""Construct a DirState on the file at path "path".

1517

def on_file(path):

1518

"""Construct a DirState on the file at path path.

2039

1519

2040

:param path: The path at which the dirstate file on disk should live.

2041

:param sha1_provider: an object meeting the SHA1Provider interface.

2042

If None, a DefaultSHA1Provider is used.

2043

1520

:return: An unlocked DirState object, associated with the given path.

2044

1521

"""

2045

if sha1_provider is None:

2046

sha1_provider = DefaultSHA1Provider()

2047

result = DirState(path, sha1_provider)

1522

result = DirState(path)

2048

1523

return result

2049

1524

2050

1525

def _read_dirblocks_if_needed(self):

2051

1526

"""Read in all the dirblocks from the file if they are not in memory.

2052

1527

2053

1528

This populates self._dirblocks, and sets self._dirblock_state to

2054

1529

IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block

2055

1530

2056

1531

"""

2057

1532

self._read_header_if_needed()

2058

1533

if self._dirblock_state == DirState.NOT_IN_MEMORY:

2059

_read_dirblocks(self)

1534

# move the _state_file pointer to after the header (in case bisect

1535

# has been called in the mean time)

1536

self._state_file.seek(self._end_of_header)

1537

text = self._state_file.read()

1538

# TODO: check the crc checksums. crc_measured = zlib.crc32(text)

1539

1540

fields = text.split('\0')

1541

# Remove the last blank entry

1542

trailing = fields.pop()

1543

assert trailing == ''

1544

# consider turning fields into a tuple.

1545

1546

# skip the first field which is the trailing null from the header.

1547

cur = 1

1548

# Each line now has an extra '\n' field which is not used

1549

# so we just skip over it

1550

# entry size:

1551

# 3 fields for the key

1552

# + number of fields per tree_data (5) * tree count

1553

# + newline

1554

num_present_parents = self._num_present_parents()

1555

tree_count = 1 + num_present_parents

1556

entry_size = self._fields_per_entry()

1557

expected_field_count = entry_size * self._num_entries

1558

field_count = len(fields)

1559

# this checks our adjustment, and also catches file too short.

1560

assert field_count - cur == expected_field_count, \

1561

'field count incorrect %s != %s, entry_size=%s, '\

1562

'num_entries=%s fields=%r' % (

1563

field_count - cur, expected_field_count, entry_size,

1564

self._num_entries, fields)

1565

1566

if num_present_parents == 1:

1567

# Bind external functions to local names

1568

_int = int

1569

# We access all fields in order, so we can just iterate over

1570

# them. Grab an straight iterator over the fields. (We use an

1571

# iterator because we don't want to do a lot of additions, nor

1572

# do we want to do a lot of slicing)

1573

next = iter(fields).next

1574

# Move the iterator to the current position

1575

for x in xrange(cur):

1576

next()

1577

# The two blocks here are deliberate: the root block and the

1578

# contents-of-root block.

1579

self._dirblocks = [('', []), ('', [])]

1580

current_block = self._dirblocks[0][1]

1581

current_dirname = ''

1582

append_entry = current_block.append

1583

for count in xrange(self._num_entries):

1584

dirname = next()

1585

name = next()

1586

file_id = next()

1587

if dirname != current_dirname:

1588

# new block - different dirname

1589

current_block = []

1590

current_dirname = dirname

1591

self._dirblocks.append((current_dirname, current_block))

1592

append_entry = current_block.append

1593

# we know current_dirname == dirname, so re-use it to avoid

1594

# creating new strings

1595

entry = ((current_dirname, name, file_id),

1596

[(# Current Tree

1597

next(), # minikind

1598

next(), # fingerprint

1599

_int(next()), # size

1600

next() == 'y', # executable

1601

next(), # packed_stat or revision_id

1602

1603

( # Parent 1

1604

next(), # minikind

1605

next(), # fingerprint

1606

_int(next()), # size

1607

next() == 'y', # executable

1608

next(), # packed_stat or revision_id

1609

1610

])

1611

trailing = next()

1612

assert trailing == '\n'

1613

# append the entry to the current block

1614

append_entry(entry)

1615

self._split_root_dirblock_into_contents()

1616

else:

1617

fields_to_entry = self._get_fields_to_entry()

1618

entries = [fields_to_entry(fields[pos:pos+entry_size])

1619

for pos in xrange(cur, field_count, entry_size)]

1620

self._entries_to_current_state(entries)

1621

# To convert from format 2 => format 3

1622

# self._dirblocks = sorted(self._dirblocks,

1623

# key=lambda blk:blk[0].split('/'))

1624

# To convert from format 3 => format 2

1625

# self._dirblocks = sorted(self._dirblocks)

1626

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

2060

1627

2061

1628

def _read_header(self):

2062

1629

"""This reads in the metadata header, and the parent ids.

2070

1637

parent_line = self._state_file.readline()

2071

1638

info = parent_line.split('\0')

2072

1639

num_parents = int(info[0])

1640

assert num_parents == len(info)-2, 'incorrect parent info line'

2073

1641

self._parents = info[1:-1]

1642

2074

1643

ghost_line = self._state_file.readline()

2075

1644

info = ghost_line.split('\0')

2076

1645

num_ghosts = int(info[1])

1646

assert num_ghosts == len(info)-3, 'incorrect ghost info line'

2077

1647

self._ghosts = info[2:-1]

2078

1648

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2079

1649

self._end_of_header = self._state_file.tell()

2087

1657

self._read_header()

2088

1658

2089

1659

def _read_prelude(self):

2090

"""Read in the prelude header of the dirstate file.

1660

"""Read in the prelude header of the dirstate file

2091

1661

2092

1662

This only reads in the stuff that is not connected to the crc

2093

1663

checksum. The position will be correct to read in the rest of

2096

1666

and their ids. Followed by a newline.

2097

1667

"""

2098

1668

header = self._state_file.readline()

2099

if header != DirState.HEADER_FORMAT_3:

2100

raise errors.BzrError(

2101

'invalid header line: %r' % (header,))

1669

assert header == DirState.HEADER_FORMAT_3, \

1670

'invalid header line: %r' % (header,)

2102

1671

crc_line = self._state_file.readline()

2103

if not crc_line.startswith('crc32: '):

2104

raise errors.BzrError('missing crc32 checksum: %r' % crc_line)

1672

assert crc_line.startswith('crc32: '), 'missing crc32 checksum'

2105

1673

self.crc_expected = int(crc_line[len('crc32: '):-1])

2106

1674

num_entries_line = self._state_file.readline()

2107

if not num_entries_line.startswith('num_entries: '):

2108

raise errors.BzrError('missing num_entries line')

1675

assert num_entries_line.startswith('num_entries: '), 'missing num_entries line'

2109

1676

self._num_entries = int(num_entries_line[len('num_entries: '):-1])

2110

1677

2111

def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):

2112

"""Find a sha1 given a stat lookup."""

2113

return self._get_packed_stat_index().get(_pack_stat(stat_result), None)

2114

2115

def _get_packed_stat_index(self):

2116

"""Get a packed_stat index of self._dirblocks."""

2117

if self._packed_stat_index is None:

2118

index = {}

2119

for key, tree_details in self._iter_entries():

2120

if tree_details[0][0] == 'f':

2121

index[tree_details[0][4]] = tree_details[0][1]

2122

self._packed_stat_index = index

2123

return self._packed_stat_index

2124

2125

1678

def save(self):

2126

1679

"""Save any pending changes created during this session.

2127

1680

2128

1681

We reuse the existing file, because that prevents race conditions with

2129

1682

file creation, and use oslocks on it to prevent concurrent modification

2130

and reads - because dirstate's incremental data aggregation is not

1683

and reads - because dirstates incremental data aggretation is not

2131

1684

compatible with reading a modified file, and replacing a file in use by

2132

another process is impossible on Windows.

1685

another process is impossible on windows.

2133

1686

2134

1687

A dirstate in read only mode should be smart enough though to validate

2135

1688

that the file has not changed, and otherwise discard its cache and

2136

1689

start over, to allow for fine grained read lock duration, so 'status'

2137

1690

wont block 'commit' - for example.

2138

1691

"""

2139

if self._changes_aborted:

2140

# Should this be a warning? For now, I'm expecting that places that

2141

# mark it inconsistent will warn, making a warning here redundant.

2142

trace.mutter('Not saving DirState because '

2143

'_changes_aborted is set.')

2144

return

2145

1692

if (self._header_state == DirState.IN_MEMORY_MODIFIED or

2146

1693

self._dirblock_state == DirState.IN_MEMORY_MODIFIED):

2147

1694

2180

1727

2181

1728

:param parent_ids: A list of parent tree revision ids.

2182

1729

:param dirblocks: A list containing one tuple for each directory in the

2183

tree. Each tuple contains the directory path and a list of entries

1730

tree. Each tuple contains the directory path and a list of entries

2184

1731

found in that directory.

2185

1732

"""

2186

1733

# our memory copy is now authoritative.

2189

1736

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2190

1737

self._parents = list(parent_ids)

2191

1738

self._id_index = None

2192

self._packed_stat_index = None

2193

1739

2194

1740

def set_path_id(self, path, new_id):

2195

1741

"""Change the id of path to new_id in the current working tree.

2199

1745

:param new_id: The new id to assign to the path. This must be a utf8

2200

1746

file id (not unicode, and not None).

2201

1747

"""

1748

assert new_id.__class__ == str, \

1749

"path_id %r is not a plain string" % (new_id,)

2202

1750

self._read_dirblocks_if_needed()

2203

1751

if len(path):

2204

# TODO: logic not written

1752

# logic not written

2205

1753

raise NotImplementedError(self.set_path_id)

2206

1754

# TODO: check new id is unique

2207

1755

entry = self._get_entry(0, path_utf8=path)

2220

1768

"""Set the parent trees for the dirstate.

2221

1769

2222

1770

:param trees: A list of revision_id, tree tuples. tree must be provided

2223

even if the revision_id refers to a ghost: supply an empty tree in

1771

even if the revision_id refers to a ghost: supply an empty tree in

2224

1772

this case.

2225

1773

:param ghosts: A list of the revision_ids that are ghosts at the time

2226

1774

of setting.

2227

"""

2228

# TODO: generate a list of parent indexes to preserve to save

1775

"""

1776

self._validate()

1777

# TODO: generate a list of parent indexes to preserve to save

2229

1778

# processing specific parent trees. In the common case one tree will

2230

1779

# be preserved - the left most parent.

2231

1780

# TODO: if the parent tree is a dirstate, we might want to walk them

2236

1785

# map and then walk the new parent trees only, mapping them into the

2237

1786

# dirstate. Walk the dirstate at the same time to remove unreferenced

2238

1787

# entries.

2239

# for now:

2240

# sketch: loop over all entries in the dirstate, cherry picking

1788

# for now:

1789

# sketch: loop over all entries in the dirstate, cherry picking

2241

1790

# entries from the parent trees, if they are not ghost trees.

2242

1791

# after we finish walking the dirstate, all entries not in the dirstate

2243

1792

# are deletes, so we want to append them to the end as per the design

2248

1797

# links. We dont't trivially use the inventory from other trees

2249

1798

# because this leads to either double touching, or to accessing

2250

1799

# missing keys,

2251

# - find other keys containing a path

2252

# We accumulate each entry via this dictionary, including the root

1800

# - find other keys containing a path

1801

# We accumulate each entry via this dictionary, including the root

2253

1802

by_path = {}

2254

1803

id_index = {}

2255

1804

# we could do parallel iterators, but because file id data may be

2259

1808

# parent, but for now the common cases are adding a new parent (merge),

2260

1809

# and replacing completely (commit), and commit is more common: so

2261

1810

# optimise merge later.

2262

1811

2263

1812

# ---- start generation of full tree mapping data

2264

1813

# what trees should we use?

2265

1814

parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]

2266

# how many trees do we end up with

1815

# how many trees do we end up with

2267

1816

parent_count = len(parent_trees)

2268

1817

2269

1818

# one: the current tree

2270

1819

for entry in self._iter_entries():

2271

1820

# skip entries not in the current tree

2272

if entry[1][0][0] in 'ar': # absent, relocated

1821

if entry[1][0][0] in ('a', 'r'): # absent, relocated

2273

1822

continue

2274

1823

by_path[entry[0]] = [entry[1][0]] + \

2275

1824

[DirState.NULL_PARENT_DETAILS] * parent_count

2276

1825

id_index[entry[0][2]] = set([entry[0]])

2277

1826

2278

1827

# now the parent trees:

2279

1828

for tree_index, tree in enumerate(parent_trees):

2280

1829

# the index is off by one, adjust it.

2294

1843

# avoid checking all known paths for the id when generating a

2295

1844

# new entry at this path: by adding the id->path mapping last,

2296

1845

# all the mappings are valid and have correct relocation

2297

# records where needed.

1846

# records where needed.

2298

1847

file_id = entry.file_id

2299

1848

path_utf8 = path.encode('utf8')

2300

1849

dirname, basename = osutils.split(path_utf8)

2309

1858

# this file id is at a different path in one of the

2310

1859

# other trees, so put absent pointers there

2311

1860

# This is the vertical axis in the matrix, all pointing

2312

# to the real path.

1861

# tot he real path.

2313

1862

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

2314

# by path consistency: Insert into an existing path record (trivial), or

1863

# by path consistency: Insert into an existing path record (trivial), or

2315

1864

# add a new one with relocation pointers for the other tree indexes.

2316

1865

if new_entry_key in id_index[file_id]:

2317

1866

# there is already an entry where this data belongs, just insert it.

2330

1879

new_details.append(DirState.NULL_PARENT_DETAILS)

2331

1880

else:

2332

1881

# grab any one entry, use it to find the right path.

2333

# TODO: optimise this to reduce memory use in highly

1882

# TODO: optimise this to reduce memory use in highly

2334

1883

# fragmented situations by reusing the relocation

2335

1884

# records.

2336

1885

a_key = iter(id_index[file_id]).next()

2355

1904

self._header_state = DirState.IN_MEMORY_MODIFIED

2356

1905

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2357

1906

self._id_index = id_index

1907

self._validate()

2358

1908

2359

1909

def _sort_entries(self, entry_list):

2360

1910

"""Given a list of entries, sort them into the right order.

2363

1913

try to keep everything in sorted blocks all the time, but sometimes

2364

1914

it's easier to sort after the fact.

2365

1915

"""

1916

# TODO: Might be faster to do a schwartzian transform?

2366

1917

def _key(entry):

2367

1918

# sort by: directory parts, file name, file id

2368

1919

return entry[0][0].split('/'), entry[0][1], entry[0][2]

2369

1920

return sorted(entry_list, key=_key)

2370

1921

2371

1922

def set_state_from_inventory(self, new_inv):

2372

"""Set new_inv as the current state.

1923

"""Set new_inv as the current state.

2373

1924

2374

1925

This API is called by tree transform, and will usually occur with

2375

1926

existing parent trees.

2376

1927

2377

1928

:param new_inv: The inventory object to set current state from.

2378

1929

"""

2379

if 'evil' in debug.debug_flags:

2380

trace.mutter_callsite(1,

2381

"set_state_from_inventory called; please mutate the tree instead")

2382

1930

self._read_dirblocks_if_needed()

2383

1931

# sketch:

2384

# Two iterators: current data and new data, both in dirblock order.

2385

# We zip them together, which tells about entries that are new in the

2386

# inventory, or removed in the inventory, or present in both and

2387

# possibly changed.

2388

2389

# You might think we could just synthesize a new dirstate directly

2390

# since we're processing it in the right order. However, we need to

2391

# also consider there may be any number of parent trees and relocation

2392

# pointers, and we don't want to duplicate that here.

1932

# incremental algorithm:

1933

# two iterators: current data and new data, both in dirblock order.

2393

1934

new_iterator = new_inv.iter_entries_by_dir()

2394

1935

# we will be modifying the dirstate, so we need a stable iterator. In

2395

1936

# future we might write one, for now we just clone the state into a

2396

# list - which is a shallow copy.

1937

# list - which is a shallow copy, so each

2397

1938

old_iterator = iter(list(self._iter_entries()))

2398

1939

# both must have roots so this is safe:

2399

1940

current_new = new_iterator.next()

2405

1946

return None

2406

1947

while current_new or current_old:

2407

1948

# skip entries in old that are not really there

2408

if current_old and current_old[1][0][0] in 'ar':

1949

if current_old and current_old[1][0][0] in ('r', 'a'):

2409

1950

# relocated or absent

2410

1951

current_old = advance(old_iterator)

2411

1952

continue

2418

1959

current_new_minikind = \

2419

1960

DirState._kind_to_minikind[current_new[1].kind]

2420

1961

if current_new_minikind == 't':

2421

fingerprint = current_new[1].reference_revision or ''

1962

fingerprint = current_new[1].reference_revision

2422

1963

else:

2423

# We normally only insert or remove records, or update

2424

# them when it has significantly changed. Then we want to

2425

# erase its fingerprint. Unaffected records should

2426

# normally not be updated at all.

2427

1964

fingerprint = ''

2428

1965

else:

2429

1966

# for safety disable variables

2430

new_path_utf8 = new_dirname = new_basename = new_id = \

2431

new_entry_key = None

1967

new_path_utf8 = new_dirname = new_basename = new_id = new_entry_key = None

2432

1968

# 5 cases, we dont have a value that is strictly greater than everything, so

2433

1969

# we make both end conditions explicit

2434

1970

if not current_old:

2443

1979

current_old = advance(old_iterator)

2444

1980

elif new_entry_key == current_old[0]:

2445

1981

# same - common case

2446

# We're looking at the same path and id in both the dirstate

2447

# and inventory, so just need to update the fields in the

2448

# dirstate from the one in the inventory.

2449

1982

# TODO: update the record if anything significant has changed.

2450

1983

# the minimal required trigger is if the execute bit or cached

2451

1984

# kind has changed.

2457

1990

# both sides are dealt with, move on

2458

1991

current_old = advance(old_iterator)

2459

1992

current_new = advance(new_iterator)

2460

elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0

2461

or (new_dirname == current_old[0][0]

2462

and new_entry_key[1:] < current_old[0][1:])):

1993

elif new_entry_key < current_old[0]:

2463

1994

# new comes before:

2464

1995

# add a entry for this and advance new

2465

1996

self.update_minimal(new_entry_key, current_new_minikind,

2467

1998

path_utf8=new_path_utf8, fingerprint=fingerprint)

2468

1999

current_new = advance(new_iterator)

2469

2000

else:

2470

# we've advanced past the place where the old key would be,

2471

# without seeing it in the new list. so it must be gone.

2001

# old comes before:

2472

2002

self._make_absent(current_old)

2473

2003

current_old = advance(old_iterator)

2474

2004

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2475

2005

self._id_index = None

2476

self._packed_stat_index = None

2477

2006

2478

2007

def _make_absent(self, current_old):

2479

2008

"""Mark current_old - an entry - as absent for tree 0.

2480

2009

2481

:return: True if this was the last details entry for the entry key:

2010

:return: True if this was the last details entry for they entry key:

2482

2011

that is, if the underlying block has had the entry removed, thus

2483

2012

shrinking in length.

2484

2013

"""

2485

2014

# build up paths that this id will be left at after the change is made,

2486

2015

# so we can update their cross references in tree 0

2487

2016

all_remaining_keys = set()

2488

# Dont check the working tree, because it's going.

2017

# Dont check the working tree, because its going.

2489

2018

for details in current_old[1][1:]:

2490

if details[0] not in 'ar': # absent, relocated

2019

if details[0] not in ('a', 'r'): # absent, relocated

2491

2020

all_remaining_keys.add(current_old[0])

2492

2021

elif details[0] == 'r': # relocated

2493

2022

# record the key for the real path.

2500

2029

# Remove it, its meaningless.

2501

2030

block = self._find_block(current_old[0])

2502

2031

entry_index, present = self._find_entry_index(current_old[0], block[1])

2503

if not present:

2504

raise AssertionError('could not find entry for %s' % (current_old,))

2032

assert present, 'could not find entry for %s' % (current_old,)

2505

2033

block[1].pop(entry_index)

2506

2034

# if we have an id_index in use, remove this key from it for this id.

2507

2035

if self._id_index is not None:

2508

2036

self._id_index[current_old[0][2]].remove(current_old[0])

2509

2037

# update all remaining keys for this id to record it as absent. The

2510

# existing details may either be the record we are marking as deleted

2038

# existing details may either be the record we are making as deleted

2511

2039

# (if there were other trees with the id present at this path), or may

2512

2040

# be relocations.

2513

2041

for update_key in all_remaining_keys:

2514

2042

update_block_index, present = \

2515

2043

self._find_block_index_from_key(update_key)

2516

if not present:

2517

raise AssertionError('could not find block for %s' % (update_key,))

2044

assert present, 'could not find block for %s' % (update_key,)

2518

2045

update_entry_index, present = \

2519

2046

self._find_entry_index(update_key, self._dirblocks[update_block_index][1])

2520

if not present:

2521

raise AssertionError('could not find entry for %s' % (update_key,))

2047

assert present, 'could not find entry for %s' % (update_key,)

2522

2048

update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]

2523

2049

# it must not be absent at the moment

2524

if update_tree_details[0][0] == 'a': # absent

2525

raise AssertionError('bad row %r' % (update_tree_details,))

2050

assert update_tree_details[0][0] != 'a' # absent

2526

2051

update_tree_details[0] = DirState.NULL_PARENT_DETAILS

2527

2052

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2528

2053

return last_reference

2539

2064

:param minikind: The type for the entry ('f' == 'file', 'd' ==

2540

2065

'directory'), etc.

2541

2066

:param executable: Should the executable bit be set?

2542

:param fingerprint: Simple fingerprint for new entry: canonical-form

2543

sha1 for files, referenced revision id for subtrees, etc.

2544

:param packed_stat: Packed stat value for new entry.

2067

:param fingerprint: Simple fingerprint for new entry.

2068

:param packed_stat: packed stat value for new entry.

2545

2069

:param size: Size information for new entry

2546

2070

:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing

2547

2071

extra computation.

2548

2549

If packed_stat and fingerprint are not given, they're invalidated in

2550

the entry.

2551

2072

"""

2552

2073

block = self._find_block(key)[1]

2553

2074

if packed_stat is None:

2554

2075

packed_stat = DirState.NULLSTAT

2555

# XXX: Some callers pass '' as the packed_stat, and it seems to be

2556

# sometimes present in the dirstate - this seems oddly inconsistent.

2557

# mbp 20071008

2558

2076

entry_index, present = self._find_entry_index(key, block)

2559

2077

new_details = (minikind, fingerprint, size, executable, packed_stat)

2560

2078

id_index = self._get_id_index()

2576

2094

# the test for existing kinds is different: this can be

2577

2095

# factored out to a helper though.

2578

2096

other_block_index, present = self._find_block_index_from_key(other_key)

2579

if not present:

2580

raise AssertionError('could not find block for %s' % (other_key,))

2097

assert present, 'could not find block for %s' % (other_key,)

2581

2098

other_entry_index, present = self._find_entry_index(other_key,

2582

2099

self._dirblocks[other_block_index][1])

2583

if not present:

2584

raise AssertionError('could not find entry for %s' % (other_key,))

2585

if path_utf8 is None:

2586

raise AssertionError('no path')

2100

assert present, 'could not find entry for %s' % (other_key,)

2101

assert path_utf8 is not None

2587

2102

self._dirblocks[other_block_index][1][other_entry_index][1][0] = \

2588

2103

('r', path_utf8, 0, False, '')

2589

2104

2590

2105

num_present_parents = self._num_present_parents()

2591

2106

for lookup_index in xrange(1, num_present_parents + 1):

2592

2107

# grab any one entry, use it to find the right path.

2593

# TODO: optimise this to reduce memory use in highly

2108

# TODO: optimise this to reduce memory use in highly

2594

2109

# fragmented situations by reusing the relocation

2595

2110

# records.

2596

2111

update_block_index, present = \

2597

2112

self._find_block_index_from_key(other_key)

2598

if not present:

2599

raise AssertionError('could not find block for %s' % (other_key,))

2113

assert present, 'could not find block for %s' % (other_key,)

2600

2114

update_entry_index, present = \

2601

2115

self._find_entry_index(other_key, self._dirblocks[update_block_index][1])

2602

if not present:

2603

raise AssertionError('could not find entry for %s' % (other_key,))

2116

assert present, 'could not find entry for %s' % (other_key,)

2604

2117

update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]

2605

if update_details[0] in 'ar': # relocated, absent

2118

if update_details[0] in ('r', 'a'): # relocated, absent

2606

2119

# its a pointer or absent in lookup_index's tree, use

2607

2120

# it as is.

2608

2121

new_entry[1].append(update_details)

2613

2126

block.insert(entry_index, new_entry)

2614

2127

existing_keys.add(key)

2615

2128

else:

2616

# Does the new state matter?

2129

# Does the new state matter?

2617

2130

block[entry_index][1][0] = new_details

2618

2131

# parents cannot be affected by what we do.

2619

# other occurences of this id can be found

2132

# other occurences of this id can be found

2620

2133

# from the id index.

2621

2134

# ---

2622

2135

# tree index consistency: All other paths for this id in this tree

2624

2137

# we may have passed entries in the state with this file id already

2625

2138

# that were absent - where parent entries are - and they need to be

2626

2139

# converted to relocated.

2627

if path_utf8 is None:

2628

raise AssertionError('no path')

2140

assert path_utf8 is not None

2629

2141

for entry_key in id_index.setdefault(key[2], set()):

2630

2142

# TODO:PROFILING: It might be faster to just update

2631

2143

# rather than checking if we need to, and then overwrite

2636

2148

# This is the vertical axis in the matrix, all pointing

2637

2149

# to the real path.

2638

2150

block_index, present = self._find_block_index_from_key(entry_key)

2639

if not present:

2640

raise AssertionError('not present: %r', entry_key)

2151

assert present

2641

2152

entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])

2642

if not present:

2643

raise AssertionError('not present: %r', entry_key)

2153

assert present

2644

2154

self._dirblocks[block_index][1][entry_index][1][0] = \

2645

2155

('r', path_utf8, 0, False, '')

2646

2156

# add a containing dirblock if needed.

2655

2165

def _validate(self):

2656

2166

"""Check that invariants on the dirblock are correct.

2657

2167

2658

This can be useful in debugging; it shouldn't be necessary in

2168

This can be useful in debugging; it shouldn't be necessary in

2659

2169

normal code.

2660

2170

2661

2171

This must be called with a lock held.

2677

2187

if not self._dirblocks[0][0] == '':

2678

2188

raise AssertionError(

2679

2189

"dirblocks don't start with root block:\n" + \

2680

pformat(self._dirblocks))

2190

pformat(dirblocks))

2681

2191

if len(self._dirblocks) > 1:

2682

2192

if not self._dirblocks[1][0] == '':

2683

2193

raise AssertionError(

2684

2194

"dirblocks missing root directory:\n" + \

2685

pformat(self._dirblocks))

2195

pformat(dirblocks))

2686

2196

# the dirblocks are sorted by their path components, name, and dir id

2687

2197

dir_names = [d[0].split('/')

2688

2198

for d in self._dirblocks[1:]]

2706

2216

"dirblock for %r is not sorted:\n%s" % \

2707

2217

(dirblock[0], pformat(dirblock)))

2708

2218

2709

def check_valid_parent():

2710

"""Check that the current entry has a valid parent.

2711

2712

This makes sure that the parent has a record,

2713

and that the parent isn't marked as "absent" in the

2714

current tree. (It is invalid to have a non-absent file in an absent

2715

directory.)

2716

"""

2717

if entry[0][0:2] == ('', ''):

2718

# There should be no parent for the root row

2719

return

2720

parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])

2721

if parent_entry == (None, None):

2722

raise AssertionError(

2723

"no parent entry for: %s in tree %s"

2724

% (this_path, tree_index))

2725

if parent_entry[1][tree_index][0] != 'd':

2726

raise AssertionError(

2727

"Parent entry for %s is not marked as a valid"

2728

" directory. %s" % (this_path, parent_entry,))

2729

2730

2219

# For each file id, for each tree: either

2731

2220

# the file id is not present at all; all rows with that id in the

2732

2221

# key have it marked as 'absent'

2733

# OR the file id is present under exactly one name; any other entries

2222

# OR the file id is present under exactly one name; any other entries

2734

2223

# that mention that id point to the correct name.

2735

2224

2736

2225

# We check this with a dict per tree pointing either to the present

2746

2235

"wrong number of entry details for row\n%s" \

2747

2236

",\nexpected %d" % \

2748

2237

(pformat(entry), tree_count))

2749

absent_positions = 0

2750

2238

for tree_index, tree_state in enumerate(entry[1]):

2751

2239

this_tree_map = id_path_maps[tree_index]

2752

2240

minikind = tree_state[0]

2753

if minikind in 'ar':

2754

absent_positions += 1

2755

2241

# have we seen this id before in this column?

2756

2242

if file_id in this_tree_map:

2757

previous_path, previous_loc = this_tree_map[file_id]

2243

previous_path = this_tree_map[file_id]

2758

2244

# any later mention of this file must be consistent with

2759

2245

# what was said before

2760

2246

if minikind == 'a':

2774

2260

# pointed to by a relocation, which must point here

2775

2261

if previous_path != this_path:

2776

2262

raise AssertionError(

2777

"entry %r inconsistent with previous path %r "

2778

"seen at %r" %

2779

(entry, previous_path, previous_loc))

2780

check_valid_parent()

2263

"entry %r inconsistent with previous path %r" % \

2264

(entry, previous_path))

2781

2265

else:

2782

2266

if minikind == 'a':

2783

2267

# absent; should not occur anywhere else

2784

this_tree_map[file_id] = None, this_path

2268

this_tree_map[file_id] = None

2785

2269

elif minikind == 'r':

2786

# relocation, must occur at expected location

2787

this_tree_map[file_id] = tree_state[1], this_path

2270

# relocation, must occur at expected location

2271

this_tree_map[file_id] = tree_state[1]

2788

2272

else:

2789

this_tree_map[file_id] = this_path, this_path

2790

check_valid_parent()

2791

if absent_positions == tree_count:

2792

raise AssertionError(

2793

"entry %r has no data for any tree." % (entry,))

2273

this_tree_map[file_id] = this_path

2794

2274

2795

2275

def _wipe_state(self):

2796

2276

"""Forget all state information about the dirstate."""

2797

2277

self._header_state = DirState.NOT_IN_MEMORY

2798

2278

self._dirblock_state = DirState.NOT_IN_MEMORY

2799

self._changes_aborted = False

2800

2279

self._parents = []

2801

2280

self._ghosts = []

2802

2281

self._dirblocks = []

2803

2282

self._id_index = None

2804

self._packed_stat_index = None

2805

2283

self._end_of_header = None

2806

2284

self._cutoff_time = None

2807

2285

self._split_path_cache = {}

2808

2286

2809

2287

def lock_read(self):

2810

"""Acquire a read lock on the dirstate."""

2288

"""Acquire a read lock on the dirstate"""

2811

2289

if self._lock_token is not None:

2812

2290

raise errors.LockContention(self._lock_token)

2813

2291

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2820

2298

self._wipe_state()

2821

2299

2822

2300

def lock_write(self):

2823

"""Acquire a write lock on the dirstate."""

2301

"""Acquire a write lock on the dirstate"""

2824

2302

if self._lock_token is not None:

2825

2303

raise errors.LockContention(self._lock_token)

2826

2304

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2833

2311

self._wipe_state()

2834

2312

2835

2313

def unlock(self):

2836

"""Drop any locks held on the dirstate."""

2314

"""Drop any locks held on the dirstate"""

2837

2315

if self._lock_token is None:

2838

2316

raise errors.LockNotHeld(self)

2839

2317

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2847

2325

self._split_path_cache = {}

2848

2326

2849

2327

def _requires_lock(self):

2850

"""Check that a lock is currently held by someone on the dirstate."""

2328

"""Checks that a lock is currently held by someone on the dirstate"""

2851

2329

if not self._lock_token:

2852

2330

raise errors.ObjectNotLocked(self)

2853

2331

2854

2332

2855

def py_update_entry(state, entry, abspath, stat_value,

2856

_stat_to_minikind=DirState._stat_to_minikind,

2857

_pack_stat=pack_stat):

2858

"""Update the entry based on what is actually on disk.

2859

2860

This function only calculates the sha if it needs to - if the entry is

2861

uncachable, or clearly different to the first parent's entry, no sha

2862

is calculated, and None is returned.

2863

2864

:param state: The dirstate this entry is in.

2865

:param entry: This is the dirblock entry for the file in question.

2866

:param abspath: The path on disk for this file.

2867

:param stat_value: The stat value done on the path.

2868

:return: None, or The sha1 hexdigest of the file (40 bytes) or link

2869

target of a symlink.

2333

def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):

2334

"""Return the index where to insert dirname into the dirblocks.

2335

2336

The return value idx is such that all directories blocks in dirblock[:idx]

2337

have names < dirname, and all blocks in dirblock[idx:] have names >=

2338

dirname.

2339

2340

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

2341

slice of a to be searched.

2870

2342

"""

2343

if hi is None:

2344

hi = len(dirblocks)

2871

2345

try:

2872

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

2346

dirname_split = cache[dirname]

2873

2347

except KeyError:

2874

# Unhandled kind

2875

return None

2876

packed_stat = _pack_stat(stat_value)

2877

(saved_minikind, saved_link_or_sha1, saved_file_size,

2878

saved_executable, saved_packed_stat) = entry[1][0]

2879

2880

if minikind == 'd' and saved_minikind == 't':

2881

minikind = 't'

2882

if (minikind == saved_minikind

2883

and packed_stat == saved_packed_stat):

2884

# The stat hasn't changed since we saved, so we can re-use the

2885

# saved sha hash.

2886

if minikind == 'd':

2887

return None

2888

2889

# size should also be in packed_stat

2890

if saved_file_size == stat_value.st_size:

2891

return saved_link_or_sha1

2892

2893

# If we have gotten this far, that means that we need to actually

2894

# process this entry.

2895

link_or_sha1 = None

2896

if minikind == 'f':

2897

executable = state._is_executable(stat_value.st_mode,

2898

saved_executable)

2899

if state._cutoff_time is None:

2900

state._sha_cutoff_time()

2901

if (stat_value.st_mtime < state._cutoff_time

2902

and stat_value.st_ctime < state._cutoff_time

2903

and len(entry[1]) > 1

2904

and entry[1][1][0] != 'a'):

2905

# Could check for size changes for further optimised

2906

# avoidance of sha1's. However the most prominent case of

2907

# over-shaing is during initial add, which this catches.

2908

# Besides, if content filtering happens, size and sha

2909

# are calculated at the same time, so checking just the size

2910

# gains nothing w.r.t. performance.

2911

link_or_sha1 = state._sha1_file(abspath)

2912

entry[1][0] = ('f', link_or_sha1, stat_value.st_size,

2913

executable, packed_stat)

2914

else:

2915

entry[1][0] = ('f', '', stat_value.st_size,

2916

executable, DirState.NULLSTAT)

2917

elif minikind == 'd':

2918

link_or_sha1 = None

2919

entry[1][0] = ('d', '', 0, False, packed_stat)

2920

if saved_minikind != 'd':

2921

# This changed from something into a directory. Make sure we

2922

# have a directory block for it. This doesn't happen very

2923

# often, so this doesn't have to be super fast.

2924

block_index, entry_index, dir_present, file_present = \

2925

state._get_block_entry_index(entry[0][0], entry[0][1], 0)

2926

state._ensure_block(block_index, entry_index,

2927

osutils.pathjoin(entry[0][0], entry[0][1]))

2928

elif minikind == 'l':

2929

link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)

2930

if state._cutoff_time is None:

2931

state._sha_cutoff_time()

2932

if (stat_value.st_mtime < state._cutoff_time

2933

and stat_value.st_ctime < state._cutoff_time):

2934

entry[1][0] = ('l', link_or_sha1, stat_value.st_size,

2935

False, packed_stat)

2936

else:

2937

entry[1][0] = ('l', '', stat_value.st_size,

2938

False, DirState.NULLSTAT)

2939

state._dirblock_state = DirState.IN_MEMORY_MODIFIED

2940

return link_or_sha1

2941

update_entry = py_update_entry

2942

2943

2944

class ProcessEntryPython(object):

2945

2946

__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",

2947

"last_source_parent", "last_target_parent", "include_unchanged",

2948

"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",

2949

"search_specific_files", "state", "source_index", "target_index",

2950

"want_unversioned", "tree"]

2951

2952

def __init__(self, include_unchanged, use_filesystem_for_exec,

2953

search_specific_files, state, source_index, target_index,

2954

want_unversioned, tree):

2955

self.old_dirname_to_file_id = {}

2956

self.new_dirname_to_file_id = {}

2957

# Just a sentry, so that _process_entry can say that this

2958

# record is handled, but isn't interesting to process (unchanged)

2959

self.uninteresting = object()

2960

# Using a list so that we can access the values and change them in

2961

# nested scope. Each one is [path, file_id, entry]

2962

self.last_source_parent = [None, None]

2963

self.last_target_parent = [None, None]

2964

self.include_unchanged = include_unchanged

2965

self.use_filesystem_for_exec = use_filesystem_for_exec

2966

self.utf8_decode = cache_utf8._utf8_decode

2967

# for all search_indexs in each path at or under each element of

2968

# search_specific_files, if the detail is relocated: add the id, and add the

2969

# relocated path as one to search if its not searched already. If the

2970

# detail is not relocated, add the id.

2971

self.searched_specific_files = set()

2972

self.search_specific_files = search_specific_files

2973

self.state = state

2974

self.source_index = source_index

2975

self.target_index = target_index

2976

self.want_unversioned = want_unversioned

2977

self.tree = tree

2978

2979

def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):

2980

"""Compare an entry and real disk to generate delta information.

2981

2982

:param path_info: top_relpath, basename, kind, lstat, abspath for

2983

the path of entry. If None, then the path is considered absent.

2984

(Perhaps we should pass in a concrete entry for this ?)

2985

Basename is returned as a utf8 string because we expect this

2986

tuple will be ignored, and don't want to take the time to

2987

decode.

2988

:return: None if these don't match

2989

A tuple of information about the change, or

2990

the object 'uninteresting' if these match, but are

2991

basically identical.

2992

"""

2993

if self.source_index is None:

2994

source_details = DirState.NULL_PARENT_DETAILS

2995

else:

2996

source_details = entry[1][self.source_index]

2997

target_details = entry[1][self.target_index]

2998

target_minikind = target_details[0]

2999

if path_info is not None and target_minikind in 'fdlt':

3000

if not (self.target_index == 0):

3001

raise AssertionError()

3002

link_or_sha1 = update_entry(self.state, entry,

3003

abspath=path_info[4], stat_value=path_info[3])

3004

# The entry may have been modified by update_entry

3005

target_details = entry[1][self.target_index]

3006

target_minikind = target_details[0]

3007

else:

3008

link_or_sha1 = None

3009

file_id = entry[0][2]

3010

source_minikind = source_details[0]

3011

if source_minikind in 'fdltr' and target_minikind in 'fdlt':

3012

# claimed content in both: diff

3013

# r | fdlt | | add source to search, add id path move and perform

3014

# | | | diff check on source-target

3015

# r | fdlt | a | dangling file that was present in the basis.

3016

# | | | ???

3017

if source_minikind in 'r':

3018

# add the source to the search path to find any children it

3019

# has. TODO ? : only add if it is a container ?

3020

if not osutils.is_inside_any(self.searched_specific_files,

3021

source_details[1]):

3022

self.search_specific_files.add(source_details[1])

3023

# generate the old path; this is needed for stating later

3024

# as well.

3025

old_path = source_details[1]

3026

old_dirname, old_basename = os.path.split(old_path)

3027

path = pathjoin(entry[0][0], entry[0][1])

3028

old_entry = self.state._get_entry(self.source_index,

3029

path_utf8=old_path)

3030

# update the source details variable to be the real

3031

# location.

3032

if old_entry == (None, None):

3033

raise errors.CorruptDirstate(self.state._filename,

3034

"entry '%s/%s' is considered renamed from %r"

3035

" but source does not exist\n"

3036

"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))

3037

source_details = old_entry[1][self.source_index]

3038

source_minikind = source_details[0]

3039

else:

3040

old_dirname = entry[0][0]

3041

old_basename = entry[0][1]

3042

old_path = path = None

3043

if path_info is None:

3044

# the file is missing on disk, show as removed.

3045

content_change = True

3046

target_kind = None

3047

target_exec = False

3048

else:

3049

# source and target are both versioned and disk file is present.

3050

target_kind = path_info[2]

3051

if target_kind == 'directory':

3052

if path is None:

3053

old_path = path = pathjoin(old_dirname, old_basename)

3054

self.new_dirname_to_file_id[path] = file_id

3055

if source_minikind != 'd':

3056

content_change = True

3057

else:

3058

# directories have no fingerprint

3059

content_change = False

3060

target_exec = False

3061

elif target_kind == 'file':

3062

if source_minikind != 'f':

3063

content_change = True

3064

else:

3065

# If the size is the same, check the sha:

3066

if target_details[2] == source_details[2]:

3067

if link_or_sha1 is None:

3068

# Stat cache miss:

3069

statvalue, link_or_sha1 = \

3070

self.state._sha1_provider.stat_and_sha1(

3071

path_info[4])

3072

self.state._observed_sha1(entry, link_or_sha1,

3073

statvalue)

3074

content_change = (link_or_sha1 != source_details[1])

3075

else:

3076

# Size changed, so must be different

3077

content_change = True

3078

# Target details is updated at update_entry time

3079

if self.use_filesystem_for_exec:

3080

# We don't need S_ISREG here, because we are sure

3081

# we are dealing with a file.

3082

target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)

3083

else:

3084

target_exec = target_details[3]

3085

elif target_kind == 'symlink':

3086

if source_minikind != 'l':

3087

content_change = True

3088

else:

3089

content_change = (link_or_sha1 != source_details[1])

3090

target_exec = False

3091

elif target_kind == 'tree-reference':

3092

if source_minikind != 't':

3093

content_change = True

3094

else:

3095

content_change = False

3096

target_exec = False

3097

else:

3098

raise Exception, "unknown kind %s" % path_info[2]

3099

if source_minikind == 'd':

3100

if path is None:

3101

old_path = path = pathjoin(old_dirname, old_basename)

3102

self.old_dirname_to_file_id[old_path] = file_id

3103

# parent id is the entry for the path in the target tree

3104

if old_dirname == self.last_source_parent[0]:

3105

source_parent_id = self.last_source_parent[1]

3106

else:

3107

try:

3108

source_parent_id = self.old_dirname_to_file_id[old_dirname]

3109

except KeyError:

3110

source_parent_entry = self.state._get_entry(self.source_index,

3111

path_utf8=old_dirname)

3112

source_parent_id = source_parent_entry[0][2]

3113

if source_parent_id == entry[0][2]:

3114

# This is the root, so the parent is None

3115

source_parent_id = None

3116

else:

3117

self.last_source_parent[0] = old_dirname

3118

self.last_source_parent[1] = source_parent_id

3119

new_dirname = entry[0][0]

3120

if new_dirname == self.last_target_parent[0]:

3121

target_parent_id = self.last_target_parent[1]

3122

else:

3123

try:

3124

target_parent_id = self.new_dirname_to_file_id[new_dirname]

3125

except KeyError:

3126

# TODO: We don't always need to do the lookup, because the

3127

# parent entry will be the same as the source entry.

3128

target_parent_entry = self.state._get_entry(self.target_index,

3129

path_utf8=new_dirname)

3130

if target_parent_entry == (None, None):

3131

raise AssertionError(

3132

"Could not find target parent in wt: %s\nparent of: %s"

3133

% (new_dirname, entry))

3134

target_parent_id = target_parent_entry[0][2]

3135

if target_parent_id == entry[0][2]:

3136

# This is the root, so the parent is None

3137

target_parent_id = None

3138

else:

3139

self.last_target_parent[0] = new_dirname

3140

self.last_target_parent[1] = target_parent_id

3141

3142

source_exec = source_details[3]

3143

if (self.include_unchanged

3144

or content_change

3145

or source_parent_id != target_parent_id

3146

or old_basename != entry[0][1]

3147

or source_exec != target_exec

3148

3149

if old_path is None:

3150

old_path = path = pathjoin(old_dirname, old_basename)

3151

old_path_u = self.utf8_decode(old_path)[0]

3152

path_u = old_path_u

3153

else:

3154

old_path_u = self.utf8_decode(old_path)[0]

3155

if old_path == path:

3156

path_u = old_path_u

3157

else:

3158

path_u = self.utf8_decode(path)[0]

3159

source_kind = DirState._minikind_to_kind[source_minikind]

3160

return (entry[0][2],

3161

(old_path_u, path_u),

3162

content_change,

3163

(True, True),

3164

(source_parent_id, target_parent_id),

3165

(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),

3166

(source_kind, target_kind),

3167

(source_exec, target_exec))

3168

else:

3169

return self.uninteresting

3170

elif source_minikind in 'a' and target_minikind in 'fdlt':

3171

# looks like a new file

3172

path = pathjoin(entry[0][0], entry[0][1])

3173

# parent id is the entry for the path in the target tree

3174

# TODO: these are the same for an entire directory: cache em.

3175

parent_id = self.state._get_entry(self.target_index,

3176

path_utf8=entry[0][0])[0][2]

3177

if parent_id == entry[0][2]:

3178

parent_id = None

3179

if path_info is not None:

3180

# Present on disk:

3181

if self.use_filesystem_for_exec:

3182

# We need S_ISREG here, because we aren't sure if this

3183

# is a file or not.

3184

target_exec = bool(

3185

stat.S_ISREG(path_info[3].st_mode)

3186

and stat.S_IEXEC & path_info[3].st_mode)

3187

else:

3188

target_exec = target_details[3]

3189

return (entry[0][2],

3190

(None, self.utf8_decode(path)[0]),

3191

True,

3192

(False, True),

3193

(None, parent_id),

3194

(None, self.utf8_decode(entry[0][1])[0]),

3195

(None, path_info[2]),

3196

(None, target_exec))

3197

else:

3198

# Its a missing file, report it as such.

3199

return (entry[0][2],

3200

(None, self.utf8_decode(path)[0]),

3201

False,

3202

(False, True),

3203

(None, parent_id),

3204

(None, self.utf8_decode(entry[0][1])[0]),

3205

(None, None),

3206

(None, False))

3207

elif source_minikind in 'fdlt' and target_minikind in 'a':

3208

# unversioned, possibly, or possibly not deleted: we dont care.

3209

# if its still on disk, *and* theres no other entry at this

3210

# path [we dont know this in this routine at the moment -

3211

# perhaps we should change this - then it would be an unknown.

3212

old_path = pathjoin(entry[0][0], entry[0][1])

3213

# parent id is the entry for the path in the target tree

3214

parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]

3215

if parent_id == entry[0][2]:

3216

parent_id = None

3217

return (entry[0][2],

3218

(self.utf8_decode(old_path)[0], None),

3219

True,

3220

(True, False),

3221

(parent_id, None),

3222

(self.utf8_decode(entry[0][1])[0], None),

3223

(DirState._minikind_to_kind[source_minikind], None),

3224

(source_details[3], None))

3225

elif source_minikind in 'fdlt' and target_minikind in 'r':

3226

# a rename; could be a true rename, or a rename inherited from

3227

# a renamed parent. TODO: handle this efficiently. Its not

3228

# common case to rename dirs though, so a correct but slow

3229

# implementation will do.

3230

if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):

3231

self.search_specific_files.add(target_details[1])

3232

elif source_minikind in 'ra' and target_minikind in 'ra':

3233

# neither of the selected trees contain this file,

3234

# so skip over it. This is not currently directly tested, but

3235

# is indirectly via test_too_much.TestCommands.test_conflicts.

3236

pass

3237

else:

3238

raise AssertionError("don't know how to compare "

3239

"source_minikind=%r, target_minikind=%r"

3240

% (source_minikind, target_minikind))

3241

## import pdb;pdb.set_trace()

3242

return None

3243

3244

def __iter__(self):

3245

return self

3246

3247

def iter_changes(self):

3248

"""Iterate over the changes."""

3249

utf8_decode = cache_utf8._utf8_decode

3250

_cmp_by_dirs = cmp_by_dirs

3251

_process_entry = self._process_entry

3252

uninteresting = self.uninteresting

3253

search_specific_files = self.search_specific_files

3254

searched_specific_files = self.searched_specific_files

3255

splitpath = osutils.splitpath

3256

# sketch:

3257

# compare source_index and target_index at or under each element of search_specific_files.

3258

# follow the following comparison table. Note that we only want to do diff operations when

3259

# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo

3260

# for the target.

3261

# cases:

3262

3263

# Source | Target | disk | action

3264

# r | fdlt | | add source to search, add id path move and perform

3265

# | | | diff check on source-target

3266

# r | fdlt | a | dangling file that was present in the basis.

3267

# | | | ???

3268

# r | a | | add source to search

3269

# r | a | a |

3270

# r | r | | this path is present in a non-examined tree, skip.

3271

# r | r | a | this path is present in a non-examined tree, skip.

3272

# a | fdlt | | add new id

3273

# a | fdlt | a | dangling locally added file, skip

3274

# a | a | | not present in either tree, skip

3275

# a | a | a | not present in any tree, skip

3276

# a | r | | not present in either tree at this path, skip as it

3277

# | | | may not be selected by the users list of paths.

3278

# a | r | a | not present in either tree at this path, skip as it

3279

# | | | may not be selected by the users list of paths.

3280

# fdlt | fdlt | | content in both: diff them

3281

# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?

3282

# fdlt | a | | unversioned: output deleted id for now

3283

# fdlt | a | a | unversioned and deleted: output deleted id

3284

# fdlt | r | | relocated in this tree, so add target to search.

3285

# | | | Dont diff, we will see an r,fd; pair when we reach

3286

# | | | this id at the other path.

3287

# fdlt | r | a | relocated in this tree, so add target to search.

3288

# | | | Dont diff, we will see an r,fd; pair when we reach

3289

# | | | this id at the other path.

3290

3291

# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by

3292

# keeping a cache of directories that we have seen.

3293

3294

while search_specific_files:

3295

# TODO: the pending list should be lexically sorted? the

3296

# interface doesn't require it.

3297

current_root = search_specific_files.pop()

3298

current_root_unicode = current_root.decode('utf8')

3299

searched_specific_files.add(current_root)

3300

# process the entries for this containing directory: the rest will be

3301

# found by their parents recursively.

3302

root_entries = self.state._entries_for_path(current_root)

3303

root_abspath = self.tree.abspath(current_root_unicode)

3304

try:

3305

root_stat = os.lstat(root_abspath)

3306

except OSError, e:

3307

if e.errno == errno.ENOENT:

3308

# the path does not exist: let _process_entry know that.

3309

root_dir_info = None

3310

else:

3311

# some other random error: hand it up.

3312

raise

3313

else:

3314

root_dir_info = ('', current_root,

3315

osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,

3316

root_abspath)

3317

if root_dir_info[2] == 'directory':

3318

if self.tree._directory_is_tree_reference(

3319

current_root.decode('utf8')):

3320

root_dir_info = root_dir_info[:2] + \

3321

('tree-reference',) + root_dir_info[3:]

3322

3323

if not root_entries and not root_dir_info:

3324

# this specified path is not present at all, skip it.

3325

continue

3326

path_handled = False

3327

for entry in root_entries:

3328

result = _process_entry(entry, root_dir_info)

3329

if result is not None:

3330

path_handled = True

3331

if result is not uninteresting:

3332

yield result

3333

if self.want_unversioned and not path_handled and root_dir_info:

3334

new_executable = bool(

3335

stat.S_ISREG(root_dir_info[3].st_mode)

3336

and stat.S_IEXEC & root_dir_info[3].st_mode)

3337

yield (None,

3338

(None, current_root_unicode),

3339

True,

3340

(False, False),

3341

(None, None),

3342

(None, splitpath(current_root_unicode)[-1]),

3343

(None, root_dir_info[2]),

3344

(None, new_executable)

3345

)

3346

initial_key = (current_root, '', '')

3347

block_index, _ = self.state._find_block_index_from_key(initial_key)

3348

if block_index == 0:

3349

# we have processed the total root already, but because the

3350

# initial key matched it we should skip it here.

3351

block_index +=1

3352

if root_dir_info and root_dir_info[2] == 'tree-reference':

3353

current_dir_info = None

3354

else:

3355

dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)

3356

try:

3357

current_dir_info = dir_iterator.next()

3358

except OSError, e:

3359

# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but

3360

# python 2.5 has e.errno == EINVAL,

3361

# and e.winerror == ERROR_DIRECTORY

3362

e_winerror = getattr(e, 'winerror', None)

3363

win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)

3364

# there may be directories in the inventory even though

3365

# this path is not a file on disk: so mark it as end of

3366

# iterator

3367

if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):

3368

current_dir_info = None

3369

elif (sys.platform == 'win32'

3370

and (e.errno in win_errors

3371

or e_winerror in win_errors)):

3372

current_dir_info = None

3373

else:

3374

raise

3375

else:

3376

if current_dir_info[0][0] == '':

3377

# remove .bzr from iteration

3378

bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))

3379

if current_dir_info[1][bzr_index][0] != '.bzr':

3380

raise AssertionError()

3381

del current_dir_info[1][bzr_index]

3382

# walk until both the directory listing and the versioned metadata

3383

# are exhausted.

3384

if (block_index < len(self.state._dirblocks) and

3385

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3386

current_block = self.state._dirblocks[block_index]

3387

else:

3388

current_block = None

3389

while (current_dir_info is not None or

3390

current_block is not None):

3391

if (current_dir_info and current_block

3392

and current_dir_info[0][0] != current_block[0]):

3393

if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:

3394

# filesystem data refers to paths not covered by the dirblock.

3395

# this has two possibilities:

3396

# A) it is versioned but empty, so there is no block for it

3397

# B) it is not versioned.

3398

3399

# if (A) then we need to recurse into it to check for

3400

# new unknown files or directories.

3401

# if (B) then we should ignore it, because we don't

3402

# recurse into unknown directories.

3403

path_index = 0

3404

while path_index < len(current_dir_info[1]):

3405

current_path_info = current_dir_info[1][path_index]

3406

if self.want_unversioned:

3407

if current_path_info[2] == 'directory':

3408

if self.tree._directory_is_tree_reference(

3409

current_path_info[0].decode('utf8')):

3410

current_path_info = current_path_info[:2] + \

3411

('tree-reference',) + current_path_info[3:]

3412

new_executable = bool(

3413

stat.S_ISREG(current_path_info[3].st_mode)

3414

and stat.S_IEXEC & current_path_info[3].st_mode)

3415

yield (None,

3416

(None, utf8_decode(current_path_info[0])[0]),

3417

True,

3418

(False, False),

3419

(None, None),

3420

(None, utf8_decode(current_path_info[1])[0]),

3421

(None, current_path_info[2]),

3422

(None, new_executable))

3423

# dont descend into this unversioned path if it is

3424

# a dir

3425

if current_path_info[2] in ('directory',

3426

'tree-reference'):

3427

del current_dir_info[1][path_index]

3428

path_index -= 1

3429

path_index += 1

3430

3431

# This dir info has been handled, go to the next

3432

try:

3433

current_dir_info = dir_iterator.next()

3434

except StopIteration:

3435

current_dir_info = None

3436

else:

3437

# We have a dirblock entry for this location, but there

3438

# is no filesystem path for this. This is most likely

3439

# because a directory was removed from the disk.

3440

# We don't have to report the missing directory,

3441

# because that should have already been handled, but we

3442

# need to handle all of the files that are contained

3443

# within.

3444

for current_entry in current_block[1]:

3445

# entry referring to file not present on disk.

3446

# advance the entry only, after processing.

3447

result = _process_entry(current_entry, None)

3448

if result is not None:

3449

if result is not uninteresting:

3450

yield result

3451

block_index +=1

3452

if (block_index < len(self.state._dirblocks) and

3453

osutils.is_inside(current_root,

3454

self.state._dirblocks[block_index][0])):

3455

current_block = self.state._dirblocks[block_index]

3456

else:

3457

current_block = None

3458

continue

3459

entry_index = 0

3460

if current_block and entry_index < len(current_block[1]):

3461

current_entry = current_block[1][entry_index]

3462

else:

3463

current_entry = None

3464

advance_entry = True

3465

path_index = 0

3466

if current_dir_info and path_index < len(current_dir_info[1]):

3467

current_path_info = current_dir_info[1][path_index]

3468

if current_path_info[2] == 'directory':

3469

if self.tree._directory_is_tree_reference(

3470

current_path_info[0].decode('utf8')):

3471

current_path_info = current_path_info[:2] + \

3472

('tree-reference',) + current_path_info[3:]

3473

else:

3474

current_path_info = None

3475

advance_path = True

3476

path_handled = False

3477

while (current_entry is not None or

3478

current_path_info is not None):

3479

if current_entry is None:

3480

# the check for path_handled when the path is advanced

3481

# will yield this path if needed.

3482

pass

3483

elif current_path_info is None:

3484

# no path is fine: the per entry code will handle it.

3485

result = _process_entry(current_entry, current_path_info)

3486

if result is not None:

3487

if result is not uninteresting:

3488

yield result

3489

elif (current_entry[0][1] != current_path_info[1]

3490

or current_entry[1][self.target_index][0] in 'ar'):

3491

# The current path on disk doesn't match the dirblock

3492

# record. Either the dirblock is marked as absent, or

3493

# the file on disk is not present at all in the

3494

# dirblock. Either way, report about the dirblock

3495

# entry, and let other code handle the filesystem one.

3496

3497

# Compare the basename for these files to determine

3498

# which comes first

3499

if current_path_info[1] < current_entry[0][1]:

3500

# extra file on disk: pass for now, but only

3501

# increment the path, not the entry

3502

advance_entry = False

3503

else:

3504

# entry referring to file not present on disk.

3505

# advance the entry only, after processing.

3506

result = _process_entry(current_entry, None)

3507

if result is not None:

3508

if result is not uninteresting:

3509

yield result

3510

advance_path = False

3511

else:

3512

result = _process_entry(current_entry, current_path_info)

3513

if result is not None:

3514

path_handled = True

3515

if result is not uninteresting:

3516

yield result

3517

if advance_entry and current_entry is not None:

3518

entry_index += 1

3519

if entry_index < len(current_block[1]):

3520

current_entry = current_block[1][entry_index]

3521

else:

3522

current_entry = None

3523

else:

3524

advance_entry = True # reset the advance flaga

3525

if advance_path and current_path_info is not None:

3526

if not path_handled:

3527

# unversioned in all regards

3528

if self.want_unversioned:

3529

new_executable = bool(

3530

stat.S_ISREG(current_path_info[3].st_mode)

3531

and stat.S_IEXEC & current_path_info[3].st_mode)

3532

try:

3533

relpath_unicode = utf8_decode(current_path_info[0])[0]

3534

except UnicodeDecodeError:

3535

raise errors.BadFilenameEncoding(

3536

current_path_info[0], osutils._fs_enc)

3537

yield (None,

3538

(None, relpath_unicode),

3539

True,

3540

(False, False),

3541

(None, None),

3542

(None, utf8_decode(current_path_info[1])[0]),

3543

(None, current_path_info[2]),

3544

(None, new_executable))

3545

# dont descend into this unversioned path if it is

3546

# a dir

3547

if current_path_info[2] in ('directory'):

3548

del current_dir_info[1][path_index]

3549

path_index -= 1

3550

# dont descend the disk iterator into any tree

3551

# paths.

3552

if current_path_info[2] == 'tree-reference':

3553

del current_dir_info[1][path_index]

3554

path_index -= 1

3555

path_index += 1

3556

if path_index < len(current_dir_info[1]):

3557

current_path_info = current_dir_info[1][path_index]

3558

if current_path_info[2] == 'directory':

3559

if self.tree._directory_is_tree_reference(

3560

current_path_info[0].decode('utf8')):

3561

current_path_info = current_path_info[:2] + \

3562

('tree-reference',) + current_path_info[3:]

3563

else:

3564

current_path_info = None

3565

path_handled = False

3566

else:

3567

advance_path = True # reset the advance flagg.

3568

if current_block is not None:

3569

block_index += 1

3570

if (block_index < len(self.state._dirblocks) and

3571

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3572

current_block = self.state._dirblocks[block_index]

3573

else:

3574

current_block = None

3575

if current_dir_info is not None:

3576

try:

3577

current_dir_info = dir_iterator.next()

3578

except StopIteration:

3579

current_dir_info = None

3580

_process_entry = ProcessEntryPython

3581

3582

3583

# Try to load the compiled form if possible

3584

try:

3585

from bzrlib._dirstate_helpers_c import (

3586

_read_dirblocks_c as _read_dirblocks,

3587

bisect_dirblock_c as bisect_dirblock,

3588

_bisect_path_left_c as _bisect_path_left,

3589

_bisect_path_right_c as _bisect_path_right,

3590

cmp_by_dirs_c as cmp_by_dirs,

3591

ProcessEntryC as _process_entry,

3592

update_entry as update_entry,

3593

)

3594

except ImportError:

3595

from bzrlib._dirstate_helpers_py import (

3596

_read_dirblocks_py as _read_dirblocks,

3597

bisect_dirblock_py as bisect_dirblock,

3598

_bisect_path_left_py as _bisect_path_left,

3599

_bisect_path_right_py as _bisect_path_right,

3600

cmp_by_dirs_py as cmp_by_dirs,

3601

)

2348

dirname_split = dirname.split('/')

2349

cache[dirname] = dirname_split

2350

while lo < hi:

2351

mid = (lo+hi)//2

2352

# Grab the dirname for the current dirblock

2353

cur = dirblocks[mid][0]

2354

try:

2355

cur_split = cache[cur]

2356

except KeyError:

2357

cur_split = cur.split('/')

2358

cache[cur] = cur_split

2359

if cur_split < dirname_split: lo = mid+1

2360

else: hi = mid

2361

return lo

2362

2363

2364

2365

def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):

2366

"""Convert stat values into a packed representation."""

2367

# jam 20060614 it isn't really worth removing more entries if we

2368

# are going to leave it in packed form.

2369

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

2370

# With all entries filesize is 5.9M and read time is mabye 280ms

2371

# well within the noise margin

2372

2373

# base64.encode always adds a final newline, so strip it off

2374

return _encode(_pack('>LLLLLL'

2375

, st.st_size, int(st.st_mtime), int(st.st_ctime)

2376

, st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]

Older »