~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Aaron Bentley
Date: 2006-08-17 19:51:54 UTC
mto: (1910.2.43 format-bumps)
mto: This revision was merged to the branch mainline in revision 1997.
Revision ID: abentley@panoramicfeedback.com-20060817195154-af960bfc59351ebf

Implement knit repo format 2

files added:
NEWS.developers

build-api

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_escaped_store.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/README.1st

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_groupcompress_py.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/_patiencediff_c.c

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/api.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/bisect_multi.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v09.py

bzrlib/bundle/serializer/v4.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/chunk_writer.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/email_message.py

bzrlib/fifo_cache.py

bzrlib/filters

bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/foreign.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/conflicts.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inspect_for_copy.py

bzrlib/inventory_delta.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/rename_map.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/rules.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/packrepository.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/tag.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/fake_command.py

bzrlib/tests/file_utils.py

bzrlib/tests/ftp_server

bzrlib/tests/ftp_server/__init__.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/https_server.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_interbranch

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/per_repository/helpers.py

bzrlib/tests/per_repository/test__generate_text_key_index.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_check_reconcile.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_find_text_key_references.py

bzrlib/tests/per_repository/test_get_parent_map.py

bzrlib/tests/per_repository/test_has_revisions.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_is_write_locked.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_statistics.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_all_revision_ids.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bisect_multi.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_export.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_implementations.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_patches_data/diff-7

bzrlib/tests/test_patches_data/mod-7

bzrlib/tests/test_patches_data/orig-7

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_rename_map.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_switch.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/test_annotate_iter.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_root_id.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_content_filters.py

bzrlib/tests/workingtree_implementations/test_eol_conversion.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_views.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/log.py

bzrlib/transport/nosmart.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/transport/unlistable.py

bzrlib/treebuilder.py

bzrlib/util/bencode.py

bzrlib/util/simplemapi.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_custom.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/views.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml7.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_access

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/bazaar-vcs.org.kid

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/btree_index_prefetch.txt

doc/developers/bundle-creation.txt

doc/developers/bundle-format4.txt

doc/developers/bundles.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/cycle.txt

doc/developers/development-repo.txt

doc/developers/diff.txt

doc/developers/directory-fingerprints.txt

doc/developers/dirstate.txt

doc/developers/ec2.txt

doc/developers/gc.txt

doc/developers/groupcompress-design.txt

doc/developers/improved_chk_index.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/indices.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/last-modified.txt

doc/developers/lca-merge.txt

doc/developers/lca_tree_merging.txt

doc/developers/merge-scaling.txt

doc/developers/missing.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/plugin-api.txt

doc/developers/ppa.txt

doc/developers/profiling.txt

doc/developers/releasing.txt

doc/developers/repository-stream.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/revision-properties.txt

doc/developers/status.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/uncommit.txt

doc/developers/update.txt

doc/en

doc/en/admin-guide

doc/en/admin-guide/index.txt

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf

doc/en/quick-reference/quick-start-summary.png

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/tutorials

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/annotating_changes.txt

doc/en/user-guide/bazaar_workflows.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/central_intro.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/controlling_registration.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/distributed_intro.txt

doc/en/user-guide/entering_commands.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/images

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_centralized.svg

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_gatekeeper.svg

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_localcommit.svg

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_peer.svg

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_pqm.svg

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_shared.svg

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/images/workflows_single.svg

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/part2_intro.txt

doc/en/user-guide/partner_intro.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/recording_changes.txt

doc/en/user-guide/releasing_a_project.txt

doc/en/user-guide/resolving_conflicts.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/reviewing_changes.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/shelving_changes.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/stacked.txt

doc/en/user-guide/starting_a_project.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/working_offline_central.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-guide/zen.txt

doc/en/user-reference

doc/en/user-reference/readme.txt

doc/es

doc/es/guia-desarrollador

doc/es/guia-usuario

doc/es/guia-usuario/index.txt

doc/es/guia-usuario/resolving_conflicts.txt

doc/es/guia-usuario/version_info.txt

doc/es/mini-tutorial

doc/es/mini-tutorial/index.txt

doc/es/notas-version

doc/es/referencia

doc/es/referencia-rapida

doc/es/referencia-rapida/Makefile

doc/es/referencia-rapida/referencia-rapida.svg

doc/index.es.txt

doc/index.txt

doc/news-template.txt

man1

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/check-newsbugs.py

tools/package_mf.py

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2html.py

tools/rst2pdf.py

tools/rst2prettyhtml.py

tools/win32/build_release.py

tools/win32/info.txt

tools/win32/run_script.py

files renamed:
doc/developers/HACKING.txt => HACKING

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/_patiencediff_py.py => bzrlib/patiencediff.py

bzrlib/tests/http_utils.py => bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/blackbox/test_send.py => bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/tests/test_commands.py => bzrlib/tests/test_command.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/test_revisionspec.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

bzrlib/win32utils.py => bzrlib/win32console.py

bzrlib/xml8.py => bzrlib/xml5.py

bzrlib/help_topics/en/configuration.txt => doc/configuration.txt

doc/en/user-guide/index.txt => doc/index.txt

doc/en/user-guide/plugins.txt => doc/plugins.txt

doc/en/user-guide/setting_up_email.txt => doc/setting_up_email.txt

doc/en/user-guide/specifying_revisions.txt => doc/specifying_revisions.txt

doc/en/tutorials/tutorial.txt => doc/tutorial.txt

doc/en/user-guide/using_aliases.txt => doc/using_aliases.txt

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzr.ico

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/elementtree/ElementTree.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

contrib/pwclient.full

doc/default.css

generate_docs.py

profile_imports.py

setup.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/trace-revisions

tools/weavebench.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""DirState objects record the state of a directory and its bzr metadata.

Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and

lines by NL. The field delimiters are ommitted in the grammar, line delimiters

are not - this is done for clarity of reading. All string data is in utf8.

MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";

NL = "\n";

NULL = "\0";

WHOLE_NUMBER = {digit}, digit;

BOOLEAN = "y" | "n";

REVISION_ID = a non-empty utf8 string;

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 3", NL;

full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", WHOLE_NUMBER, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

entry = entry_key, current_entry_details, {parent_entry_details};

entry_key = dirname, basename, fileid;

current_entry_details = common_entry_details, working_entry_details;

parent_entry_details = common_entry_details, history_entry_details;

common_entry_details = MINIKIND, fingerprint, size, executable

working_entry_details = packed_stat

history_entry_details = REVISION_ID;

executable = BOOLEAN;

size = WHOLE_NUMBER;

fingerprint = a nonempty utf8 sequence with meaning defined by minikind.

Given this definition, the following is useful to know:

entry (aka row) - all the data for a given key.

entry[0]: The key (dirname, basename, fileid)

entry[0][0]: dirname

entry[0][1]: basename

entry[0][2]: fileid

entry[1]: The tree(s) data for this path and id combination.

entry[1][0]: The current tree

entry[1][1]: The second tree

For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:

entry[1][0][0]: minikind

entry[1][0][1]: fingerprint

entry[1][0][2]: size

entry[1][0][3]: executable

entry[1][0][4]: packed_stat

OR (for non tree-0)

entry[1][1][4]: revision_id

There may be multiple rows at the root, one per id present in the root, so the

in memory root row is now:

self._dirblocks[0] -> ('', [entry ...]),

and the entries in there are

entries[0][0]: ''

entries[0][1]: ''

entries[0][2]: file_id

entries[1][0]: The tree data for the current tree for this fileid at /

etc.

Kinds:

'r' is a relocated entry: This path is not present in this tree with this id,

but the id can be found at another location. The fingerprint is used to

point to the target location.

'a' is an absent entry: In that tree the id is not present at this path.

'd' is a directory entry: This path in this tree is a directory with the

current file id. There is no fingerprint for directories.

'f' is a file entry: As for directory, but it's a file. The fingerprint is the

sha1 value of the file's canonical form, i.e. after any read filters have

been applied to the convenience form stored in the working tree.

'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the

link target.

't' is a reference to a nested subtree; the fingerprint is the referenced

revision.

Ordering:

The entries on disk and in memory are ordered according to the following keys:

directory, as a list of components

filename

file-id

100

101

--- Format 1 had the following different definition: ---

102

rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,

103

WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,

104

{PARENT ROW}

105

PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,

106

basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,

107

SHA1

108

109

PARENT ROW's are emitted for every parent that is not in the ghosts details

110

line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then

111

each row will have a PARENT ROW for foo and baz, but not for bar.

112

113

114

In any tree, a kind of 'moved' indicates that the fingerprint field

115

(which we treat as opaque data specific to the 'kind' anyway) has the

116

details for the id of this row in that tree.

117

118

I'm strongly tempted to add a id->path index as well, but I think that

119

where we need id->path mapping; we also usually read the whole file, so

120

I'm going to skip that for the moment, as we have the ability to locate

121

via bisect any path in any tree, and if we lookup things by path, we can

122

accumulate an id->path mapping as we go, which will tend to match what we

123

looked for.

124

125

I plan to implement this asap, so please speak up now to alter/tweak the

126

design - and once we stabilise on this, I'll update the wiki page for

127

it.

128

129

The rationale for all this is that we want fast operations for the

130

common case (diff/status/commit/merge on all files) and extremely fast

131

operations for the less common but still occurs a lot status/diff/commit

132

on specific files). Operations on specific files involve a scan for all

133

the children of a path, *in every involved tree*, which the current

134

format did not accommodate.

135

----

136

137

Design priorities:

138

1) Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???)

139

2) fall back current object model as needed.

140

3) scale usably to the largest trees known today - say 50K entries. (mozilla

141

is an example of this)

142

143

144

Locking:

145

Eventually reuse dirstate objects across locks IFF the dirstate file has not

146

been modified, but will require that we flush/ignore cached stat-hit data

147

because we won't want to restat all files on disk just because a lock was

148

acquired, yet we cannot trust the data after the previous lock was released.

149

150

Memory representation:

151

vector of all directories, and vector of the childen ?

152

i.e.

153

root_entrie = (direntry for root, [parent_direntries_for_root]),

154

dirblocks = [

155

('', ['data for achild', 'data for bchild', 'data for cchild'])

156

('dir', ['achild', 'cchild', 'echild'])

157

]

158

- single bisect to find N subtrees from a path spec

159

- in-order for serialisation - this is 'dirblock' grouping.

160

- insertion of a file '/a' affects only the '/' child-vector, that is, to

161

insert 10K elements from scratch does not generates O(N^2) memoves of a

162

single vector, rather each individual, which tends to be limited to a

163

manageable number. Will scale badly on trees with 10K entries in a

164

single directory. compare with Inventory.InventoryDirectory which has

165

a dictionary for the children. No bisect capability, can only probe for

166

exact matches, or grab all elements and sort.

167

- What's the risk of error here? Once we have the base format being processed

168

we should have a net win regardless of optimality. So we are going to

169

go with what seems reasonable.

170

open questions:

171

172

Maybe we should do a test profile of the core structure - 10K simulated

173

searches/lookups/etc?

174

175

Objects for each row?

176

The lifetime of Dirstate objects is current per lock, but see above for

177

possible extensions. The lifetime of a row from a dirstate is expected to be

178

very short in the optimistic case: which we are optimising for. For instance,

179

subtree status will determine from analysis of the disk data what rows need to

180

be examined at all, and will be able to determine from a single row whether

181

that file has altered or not, so we are aiming to process tens of thousands of

182

entries each second within the dirstate context, before exposing anything to

183

the larger codebase. This suggests we want the time for a single file

184

comparison to be < 0.1 milliseconds. That would give us 10000 paths per second

185

processed, and to scale to 100 thousand we'll another order of magnitude to do

186

that. Now, as the lifetime for all unchanged entries is the time to parse, stat

187

the file on disk, and then immediately discard, the overhead of object creation

188

becomes a significant cost.

189

190

Figures: Creating a tuple from 3 elements was profiled at 0.0625

191

microseconds, whereas creating a object which is subclassed from tuple was

192

0.500 microseconds, and creating an object with 3 elements and slots was 3

193

microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get

194

down to 10 microseconds for the total processing - having 33% of that be object

195

creation is a huge overhead. There is a potential cost in using tuples within

196

each row which is that the conditional code to do comparisons may be slower

197

than method invocation, but method invocation is known to be slow due to stack

198

frame creation, so avoiding methods in these tight inner loops in unfortunately

199

desirable. We can consider a pyrex version of this with objects in future if

200

desired.

201

202

"""

203

204

import bisect

205

import binascii

206

import errno

207

import os

208

from stat import S_IEXEC

209

import stat

210

import struct

211

import sys

212

import time

213

import zlib

214

215

from bzrlib import (

216

cache_utf8,

217

debug,

218

errors,

219

inventory,

220

lock,

221

osutils,

222

trace,

223

)

224

225

226

# This is the Windows equivalent of ENOTDIR

227

# It is defined in pywin32.winerror, but we don't want a strong dependency for

228

# just an error code.

229

ERROR_PATH_NOT_FOUND = 3

230

ERROR_DIRECTORY = 267

231

232

233

if not getattr(struct, '_compile', None):

234

# Cannot pre-compile the dirstate pack_stat

235

def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):

236

"""Convert stat values into a packed representation."""

237

return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),

238

int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,

239

st.st_mode))[:-1]

240

else:

241

# compile the struct compiler we need, so as to only do it once

242

from _struct import Struct

243

_compiled_pack = Struct('>LLLLLL').pack

244

def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):

245

"""Convert stat values into a packed representation."""

246

# jam 20060614 it isn't really worth removing more entries if we

247

# are going to leave it in packed form.

248

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

249

# With all entries, filesize is 5.9M and read time is maybe 280ms

250

# well within the noise margin

251

252

# base64 encoding always adds a final newline, so strip it off

253

# The current version

254

return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),

255

st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]

256

# This is 0.060s / 1.520s faster by not encoding as much information

257

# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]

258

# This is not strictly faster than _encode(_pack())[:-1]

259

# return '%X.%X.%X.%X.%X.%X' % (

260

# st.st_size, int(st.st_mtime), int(st.st_ctime),

261

# st.st_dev, st.st_ino, st.st_mode)

262

# Similar to the _encode(_pack('>LL'))

263

# return '%X.%X' % (int(st.st_mtime), st.st_mode)

264

265

266

class SHA1Provider(object):

267

"""An interface for getting sha1s of a file."""

268

269

def sha1(self, abspath):

270

"""Return the sha1 of a file given its absolute path."""

271

raise NotImplementedError(self.sha1)

272

273

def stat_and_sha1(self, abspath):

274

"""Return the stat and sha1 of a file given its absolute path.

275

276

Note: the stat should be the stat of the physical file

277

while the sha may be the sha of its canonical content.

278

"""

279

raise NotImplementedError(self.stat_and_sha1)

280

281

282

class DefaultSHA1Provider(SHA1Provider):

283

"""A SHA1Provider that reads directly from the filesystem."""

284

285

def sha1(self, abspath):

286

"""Return the sha1 of a file given its absolute path."""

287

return osutils.sha_file_by_name(abspath)

288

289

def stat_and_sha1(self, abspath):

290

"""Return the stat and sha1 of a file given its absolute path."""

291

file_obj = file(abspath, 'rb')

292

try:

293

statvalue = os.fstat(file_obj.fileno())

294

sha1 = osutils.sha_file(file_obj)

295

finally:

296

file_obj.close()

297

return statvalue, sha1

298

299

300

class DirState(object):

301

"""Record directory and metadata state for fast access.

302

303

A dirstate is a specialised data structure for managing local working

304

tree state information. Its not yet well defined whether it is platform

305

specific, and if it is how we detect/parameterize that.

306

307

Dirstates use the usual lock_write, lock_read and unlock mechanisms.

308

Unlike most bzr disk formats, DirStates must be locked for reading, using

309

lock_read. (This is an os file lock internally.) This is necessary

310

because the file can be rewritten in place.

311

312

DirStates must be explicitly written with save() to commit changes; just

313

unlocking them does not write the changes to disk.

314

"""

315

316

_kind_to_minikind = {

317

'absent': 'a',

318

'file': 'f',

319

'directory': 'd',

320

'relocated': 'r',

321

'symlink': 'l',

322

'tree-reference': 't',

323

}

324

_minikind_to_kind = {

325

'a': 'absent',

326

'f': 'file',

327

'd': 'directory',

328

'l':'symlink',

329

'r': 'relocated',

330

't': 'tree-reference',

331

}

332

_stat_to_minikind = {

333

stat.S_IFDIR:'d',

334

stat.S_IFREG:'f',

335

stat.S_IFLNK:'l',

336

}

337

_to_yesno = {True:'y', False: 'n'} # TODO profile the performance gain

338

# of using int conversion rather than a dict here. AND BLAME ANDREW IF

339

# it is faster.

340

341

# TODO: jam 20070221 Figure out what to do if we have a record that exceeds

342

# the BISECT_PAGE_SIZE. For now, we just have to make it large enough

343

# that we are sure a single record will always fit.

344

BISECT_PAGE_SIZE = 4096

345

346

NOT_IN_MEMORY = 0

347

IN_MEMORY_UNMODIFIED = 1

348

IN_MEMORY_MODIFIED = 2

349

350

# A pack_stat (the x's) that is just noise and will never match the output

351

# of base64 encode.

352

NULLSTAT = 'x' * 32

353

NULL_PARENT_DETAILS = ('a', '', 0, False, '')

354

355

HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'

356

HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'

357

358

def __init__(self, path, sha1_provider):

359

"""Create a DirState object.

360

361

:param path: The path at which the dirstate file on disk should live.

362

:param sha1_provider: an object meeting the SHA1Provider interface.

363

"""

364

# _header_state and _dirblock_state represent the current state

365

# of the dirstate metadata and the per-row data respectiely.

366

# NOT_IN_MEMORY indicates that no data is in memory

367

# IN_MEMORY_UNMODIFIED indicates that what we have in memory

368

# is the same as is on disk

369

# IN_MEMORY_MODIFIED indicates that we have a modified version

370

# of what is on disk.

371

# In future we will add more granularity, for instance _dirblock_state

372

# will probably support partially-in-memory as a separate variable,

373

# allowing for partially-in-memory unmodified and partially-in-memory

374

# modified states.

375

self._header_state = DirState.NOT_IN_MEMORY

376

self._dirblock_state = DirState.NOT_IN_MEMORY

377

# If true, an error has been detected while updating the dirstate, and

378

# for safety we're not going to commit to disk.

379

self._changes_aborted = False

380

self._dirblocks = []

381

self._ghosts = []

382

self._parents = []

383

self._state_file = None

384

self._filename = path

385

self._lock_token = None

386

self._lock_state = None

387

self._id_index = None

388

# a map from packed_stat to sha's.

389

self._packed_stat_index = None

390

self._end_of_header = None

391

self._cutoff_time = None

392

self._split_path_cache = {}

393

self._bisect_page_size = DirState.BISECT_PAGE_SIZE

394

self._sha1_provider = sha1_provider

395

if 'hashcache' in debug.debug_flags:

396

self._sha1_file = self._sha1_file_and_mutter

397

else:

398

self._sha1_file = self._sha1_provider.sha1

399

# These two attributes provide a simple cache for lookups into the

400

# dirstate in-memory vectors. By probing respectively for the last

401

# block, and for the next entry, we save nearly 2 bisections per path

402

# during commit.

403

self._last_block_index = None

404

self._last_entry_index = None

405

406

def __repr__(self):

407

return "%s(%r)" % \

408

(self.__class__.__name__, self._filename)

409

410

def add(self, path, file_id, kind, stat, fingerprint):

411

"""Add a path to be tracked.

412

413

:param path: The path within the dirstate - '' is the root, 'foo' is the

414

path foo within the root, 'foo/bar' is the path bar within foo

415

within the root.

416

:param file_id: The file id of the path being added.

417

:param kind: The kind of the path, as a string like 'file',

418

'directory', etc.

419

:param stat: The output of os.lstat for the path.

420

:param fingerprint: The sha value of the file's canonical form (i.e.

421

after any read filters have been applied),

422

or the target of a symlink,

423

or the referenced revision id for tree-references,

424

or '' for directories.

425

"""

426

# adding a file:

427

# find the block its in.

428

# find the location in the block.

429

# check its not there

430

# add it.

431

#------- copied from inventory.ensure_normalized_name - keep synced.

432

# --- normalized_filename wants a unicode basename only, so get one.

433

dirname, basename = osutils.split(path)

434

# we dont import normalized_filename directly because we want to be

435

# able to change the implementation at runtime for tests.

436

norm_name, can_access = osutils.normalized_filename(basename)

437

if norm_name != basename:

438

if can_access:

439

basename = norm_name

440

else:

441

raise errors.InvalidNormalization(path)

442

# you should never have files called . or ..; just add the directory

443

# in the parent, or according to the special treatment for the root

444

if basename == '.' or basename == '..':

445

raise errors.InvalidEntryName(path)

446

# now that we've normalised, we need the correct utf8 path and

447

# dirname and basename elements. This single encode and split should be

448

# faster than three separate encodes.

449

utf8path = (dirname + '/' + basename).strip('/').encode('utf8')

450

dirname, basename = osutils.split(utf8path)

451

# uses __class__ for speed; the check is needed for safety

452

if file_id.__class__ is not str:

453

raise AssertionError(

454

"must be a utf8 file_id not %s" % (type(file_id), ))

455

# Make sure the file_id does not exist in this tree

456

rename_from = None

457

file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)

458

if file_id_entry != (None, None):

459

if file_id_entry[1][0][0] == 'a':

460

if file_id_entry[0] != (dirname, basename, file_id):

461

# set the old name's current operation to rename

462

self.update_minimal(file_id_entry[0],

463

'r',

464

path_utf8='',

465

packed_stat='',

466

fingerprint=utf8path

467

)

468

rename_from = file_id_entry[0][0:2]

469

else:

470

path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])

471

kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]

472

info = '%s:%s' % (kind, path)

473

raise errors.DuplicateFileId(file_id, info)

474

first_key = (dirname, basename, '')

475

block_index, present = self._find_block_index_from_key(first_key)

476

if present:

477

# check the path is not in the tree

478

block = self._dirblocks[block_index][1]

479

entry_index, _ = self._find_entry_index(first_key, block)

480

while (entry_index < len(block) and

481

block[entry_index][0][0:2] == first_key[0:2]):

482

if block[entry_index][1][0][0] not in 'ar':

483

# this path is in the dirstate in the current tree.

484

raise Exception, "adding already added path!"

485

entry_index += 1

486

else:

487

# The block where we want to put the file is not present. But it

488

# might be because the directory was empty, or not loaded yet. Look

489

# for a parent entry, if not found, raise NotVersionedError

490

parent_dir, parent_base = osutils.split(dirname)

491

parent_block_idx, parent_entry_idx, _, parent_present = \

492

self._get_block_entry_index(parent_dir, parent_base, 0)

493

if not parent_present:

494

raise errors.NotVersionedError(path, str(self))

495

self._ensure_block(parent_block_idx, parent_entry_idx, dirname)

496

block = self._dirblocks[block_index][1]

497

entry_key = (dirname, basename, file_id)

498

if stat is None:

499

size = 0

500

packed_stat = DirState.NULLSTAT

501

else:

502

size = stat.st_size

503

packed_stat = pack_stat(stat)

504

parent_info = self._empty_parent_info()

505

minikind = DirState._kind_to_minikind[kind]

506

if rename_from is not None:

507

if rename_from[0]:

508

old_path_utf8 = '%s/%s' % rename_from

509

else:

510

old_path_utf8 = rename_from[1]

511

parent_info[0] = ('r', old_path_utf8, 0, False, '')

512

if kind == 'file':

513

entry_data = entry_key, [

514

(minikind, fingerprint, size, False, packed_stat),

515

] + parent_info

516

elif kind == 'directory':

517

entry_data = entry_key, [

518

(minikind, '', 0, False, packed_stat),

519

] + parent_info

520

elif kind == 'symlink':

521

entry_data = entry_key, [

522

(minikind, fingerprint, size, False, packed_stat),

523

] + parent_info

524

elif kind == 'tree-reference':

525

entry_data = entry_key, [

526

(minikind, fingerprint, 0, False, packed_stat),

527

] + parent_info

528

else:

529

raise errors.BzrError('unknown kind %r' % kind)

530

entry_index, present = self._find_entry_index(entry_key, block)

531

if not present:

532

block.insert(entry_index, entry_data)

533

else:

534

if block[entry_index][1][0][0] != 'a':

535

raise AssertionError(" %r(%r) already added" % (basename, file_id))

536

block[entry_index][1][0] = entry_data[1][0]

537

538

if kind == 'directory':

539

# insert a new dirblock

540

self._ensure_block(block_index, entry_index, utf8path)

541

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

542

if self._id_index:

543

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

544

545

def _bisect(self, paths):

546

"""Bisect through the disk structure for specific rows.

547

548

:param paths: A list of paths to find

549

:return: A dict mapping path => entries for found entries. Missing

550

entries will not be in the map.

551

The list is not sorted, and entries will be populated

552

based on when they were read.

553

"""

554

self._requires_lock()

555

# We need the file pointer to be right after the initial header block

556

self._read_header_if_needed()

557

# If _dirblock_state was in memory, we should just return info from

558

# there, this function is only meant to handle when we want to read

559

# part of the disk.

560

if self._dirblock_state != DirState.NOT_IN_MEMORY:

561

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

562

563

# The disk representation is generally info + '\0\n\0' at the end. But

564

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

565

# Because it means we can sync on the '\n'

566

state_file = self._state_file

567

file_size = os.fstat(state_file.fileno()).st_size

568

# We end up with 2 extra fields, we should have a trailing '\n' to

569

# ensure that we read the whole record, and we should have a precursur

570

# '' which ensures that we start after the previous '\n'

571

entry_field_count = self._fields_per_entry() + 1

572

573

low = self._end_of_header

574

high = file_size - 1 # Ignore the final '\0'

575

# Map from (dir, name) => entry

576

found = {}

577

578

# Avoid infinite seeking

579

max_count = 30*len(paths)

580

count = 0

581

# pending is a list of places to look.

582

# each entry is a tuple of low, high, dir_names

583

# low -> the first byte offset to read (inclusive)

584

# high -> the last byte offset (inclusive)

585

# dir_names -> The list of (dir, name) pairs that should be found in

586

# the [low, high] range

587

pending = [(low, high, paths)]

588

589

page_size = self._bisect_page_size

590

591

fields_to_entry = self._get_fields_to_entry()

592

593

while pending:

594

low, high, cur_files = pending.pop()

595

596

if not cur_files or low >= high:

597

# Nothing to find

598

continue

599

600

count += 1

601

if count > max_count:

602

raise errors.BzrError('Too many seeks, most likely a bug.')

603

604

mid = max(low, (low+high-page_size)/2)

605

606

state_file.seek(mid)

607

# limit the read size, so we don't end up reading data that we have

608

# already read.

609

read_size = min(page_size, (high-mid)+1)

610

block = state_file.read(read_size)

611

612

start = mid

613

entries = block.split('\n')

614

615

if len(entries) < 2:

616

# We didn't find a '\n', so we cannot have found any records.

617

# So put this range back and try again. But we know we have to

618

# increase the page size, because a single read did not contain

619

# a record break (so records must be larger than page_size)

620

page_size *= 2

621

pending.append((low, high, cur_files))

622

continue

623

624

# Check the first and last entries, in case they are partial, or if

625

# we don't care about the rest of this page

626

first_entry_num = 0

627

first_fields = entries[0].split('\0')

628

if len(first_fields) < entry_field_count:

629

# We didn't get the complete first entry

630

# so move start, and grab the next, which

631

# should be a full entry

632

start += len(entries[0])+1

633

first_fields = entries[1].split('\0')

634

first_entry_num = 1

635

636

if len(first_fields) <= 2:

637

# We didn't even get a filename here... what do we do?

638

# Try a large page size and repeat this query

639

page_size *= 2

640

pending.append((low, high, cur_files))

641

continue

642

else:

643

# Find what entries we are looking for, which occur before and

644

# after this first record.

645

after = start

646

if first_fields[1]:

647

first_path = first_fields[1] + '/' + first_fields[2]

648

else:

649

first_path = first_fields[2]

650

first_loc = _bisect_path_left(cur_files, first_path)

651

652

# These exist before the current location

653

pre = cur_files[:first_loc]

654

# These occur after the current location, which may be in the

655

# data we read, or might be after the last entry

656

post = cur_files[first_loc:]

657

658

if post and len(first_fields) >= entry_field_count:

659

# We have files after the first entry

660

661

# Parse the last entry

662

last_entry_num = len(entries)-1

663

last_fields = entries[last_entry_num].split('\0')

664

if len(last_fields) < entry_field_count:

665

# The very last hunk was not complete,

666

# read the previous hunk

667

after = mid + len(block) - len(entries[-1])

668

last_entry_num -= 1

669

last_fields = entries[last_entry_num].split('\0')

670

else:

671

after = mid + len(block)

672

673

if last_fields[1]:

674

last_path = last_fields[1] + '/' + last_fields[2]

675

else:

676

last_path = last_fields[2]

677

last_loc = _bisect_path_right(post, last_path)

678

679

middle_files = post[:last_loc]

680

post = post[last_loc:]

681

682

if middle_files:

683

# We have files that should occur in this block

684

# (>= first, <= last)

685

# Either we will find them here, or we can mark them as

686

# missing.

687

688

if middle_files[0] == first_path:

689

# We might need to go before this location

690

pre.append(first_path)

691

if middle_files[-1] == last_path:

692

post.insert(0, last_path)

693

694

# Find out what paths we have

695

paths = {first_path:[first_fields]}

696

# last_path might == first_path so we need to be

697

# careful if we should append rather than overwrite

698

if last_entry_num != first_entry_num:

699

paths.setdefault(last_path, []).append(last_fields)

700

for num in xrange(first_entry_num+1, last_entry_num):

701

# TODO: jam 20070223 We are already splitting here, so

702

# shouldn't we just split the whole thing rather

703

# than doing the split again in add_one_record?

704

fields = entries[num].split('\0')

705

if fields[1]:

706

path = fields[1] + '/' + fields[2]

707

else:

708

path = fields[2]

709

paths.setdefault(path, []).append(fields)

710

711

for path in middle_files:

712

for fields in paths.get(path, []):

713

# offset by 1 because of the opening '\0'

714

# consider changing fields_to_entry to avoid the

715

# extra list slice

716

entry = fields_to_entry(fields[1:])

717

found.setdefault(path, []).append(entry)

718

719

# Now we have split up everything into pre, middle, and post, and

720

# we have handled everything that fell in 'middle'.

721

# We add 'post' first, so that we prefer to seek towards the

722

# beginning, so that we will tend to go as early as we need, and

723

# then only seek forward after that.

724

if post:

725

pending.append((after, high, post))

726

if pre:

727

pending.append((low, start-1, pre))

728

729

# Consider that we may want to return the directory entries in sorted

730

# order. For now, we just return them in whatever order we found them,

731

# and leave it up to the caller if they care if it is ordered or not.

732

return found

733

734

def _bisect_dirblocks(self, dir_list):

735

"""Bisect through the disk structure to find entries in given dirs.

736

737

_bisect_dirblocks is meant to find the contents of directories, which

738

differs from _bisect, which only finds individual entries.

739

740

:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].

741

:return: A map from dir => entries_for_dir

742

"""

743

# TODO: jam 20070223 A lot of the bisecting logic could be shared

744

# between this and _bisect. It would require parameterizing the

745

# inner loop with a function, though. We should evaluate the

746

# performance difference.

747

self._requires_lock()

748

# We need the file pointer to be right after the initial header block

749

self._read_header_if_needed()

750

# If _dirblock_state was in memory, we should just return info from

751

# there, this function is only meant to handle when we want to read

752

# part of the disk.

753

if self._dirblock_state != DirState.NOT_IN_MEMORY:

754

raise AssertionError("bad dirblock state %r" % self._dirblock_state)

755

# The disk representation is generally info + '\0\n\0' at the end. But

756

# for bisecting, it is easier to treat this as '\0' + info + '\0\n'

757

# Because it means we can sync on the '\n'

758

state_file = self._state_file

759

file_size = os.fstat(state_file.fileno()).st_size

760

# We end up with 2 extra fields, we should have a trailing '\n' to

761

# ensure that we read the whole record, and we should have a precursur

762

# '' which ensures that we start after the previous '\n'

763

entry_field_count = self._fields_per_entry() + 1

764

765

low = self._end_of_header

766

high = file_size - 1 # Ignore the final '\0'

767

# Map from dir => entry

768

found = {}

769

770

# Avoid infinite seeking

771

max_count = 30*len(dir_list)

772

count = 0

773

# pending is a list of places to look.

774

# each entry is a tuple of low, high, dir_names

775

# low -> the first byte offset to read (inclusive)

776

# high -> the last byte offset (inclusive)

777

# dirs -> The list of directories that should be found in

778

# the [low, high] range

779

pending = [(low, high, dir_list)]

780

781

page_size = self._bisect_page_size

782

783

fields_to_entry = self._get_fields_to_entry()

784

785

while pending:

786

low, high, cur_dirs = pending.pop()

787

788

if not cur_dirs or low >= high:

789

# Nothing to find

790

continue

791

792

count += 1

793

if count > max_count:

794

raise errors.BzrError('Too many seeks, most likely a bug.')

795

796

mid = max(low, (low+high-page_size)/2)

797

798

state_file.seek(mid)

799

# limit the read size, so we don't end up reading data that we have

800

# already read.

801

read_size = min(page_size, (high-mid)+1)

802

block = state_file.read(read_size)

803

804

start = mid

805

entries = block.split('\n')

806

807

if len(entries) < 2:

808

# We didn't find a '\n', so we cannot have found any records.

809

# So put this range back and try again. But we know we have to

810

# increase the page size, because a single read did not contain

811

# a record break (so records must be larger than page_size)

812

page_size *= 2

813

pending.append((low, high, cur_dirs))

814

continue

815

816

# Check the first and last entries, in case they are partial, or if

817

# we don't care about the rest of this page

818

first_entry_num = 0

819

first_fields = entries[0].split('\0')

820

if len(first_fields) < entry_field_count:

821

# We didn't get the complete first entry

822

# so move start, and grab the next, which

823

# should be a full entry

824

start += len(entries[0])+1

825

first_fields = entries[1].split('\0')

826

first_entry_num = 1

827

828

if len(first_fields) <= 1:

829

# We didn't even get a dirname here... what do we do?

830

# Try a large page size and repeat this query

831

page_size *= 2

832

pending.append((low, high, cur_dirs))

833

continue

834

else:

835

# Find what entries we are looking for, which occur before and

836

# after this first record.

837

after = start

838

first_dir = first_fields[1]

839

first_loc = bisect.bisect_left(cur_dirs, first_dir)

840

841

# These exist before the current location

842

pre = cur_dirs[:first_loc]

843

# These occur after the current location, which may be in the

844

# data we read, or might be after the last entry

845

post = cur_dirs[first_loc:]

846

847

if post and len(first_fields) >= entry_field_count:

848

# We have records to look at after the first entry

849

850

# Parse the last entry

851

last_entry_num = len(entries)-1

852

last_fields = entries[last_entry_num].split('\0')

853

if len(last_fields) < entry_field_count:

854

# The very last hunk was not complete,

855

# read the previous hunk

856

after = mid + len(block) - len(entries[-1])

857

last_entry_num -= 1

858

last_fields = entries[last_entry_num].split('\0')

859

else:

860

after = mid + len(block)

861

862

last_dir = last_fields[1]

863

last_loc = bisect.bisect_right(post, last_dir)

864

865

middle_files = post[:last_loc]

866

post = post[last_loc:]

867

868

if middle_files:

869

# We have files that should occur in this block

870

# (>= first, <= last)

871

# Either we will find them here, or we can mark them as

872

# missing.

873

874

if middle_files[0] == first_dir:

875

# We might need to go before this location

876

pre.append(first_dir)

877

if middle_files[-1] == last_dir:

878

post.insert(0, last_dir)

879

880

# Find out what paths we have

881

paths = {first_dir:[first_fields]}

882

# last_dir might == first_dir so we need to be

883

# careful if we should append rather than overwrite

884

if last_entry_num != first_entry_num:

885

paths.setdefault(last_dir, []).append(last_fields)

886

for num in xrange(first_entry_num+1, last_entry_num):

887

# TODO: jam 20070223 We are already splitting here, so

888

# shouldn't we just split the whole thing rather

889

# than doing the split again in add_one_record?

890

fields = entries[num].split('\0')

891

paths.setdefault(fields[1], []).append(fields)

892

893

for cur_dir in middle_files:

894

for fields in paths.get(cur_dir, []):

895

# offset by 1 because of the opening '\0'

896

# consider changing fields_to_entry to avoid the

897

# extra list slice

898

entry = fields_to_entry(fields[1:])

899

found.setdefault(cur_dir, []).append(entry)

900

901

# Now we have split up everything into pre, middle, and post, and

902

# we have handled everything that fell in 'middle'.

903

# We add 'post' first, so that we prefer to seek towards the

904

# beginning, so that we will tend to go as early as we need, and

905

# then only seek forward after that.

906

if post:

907

pending.append((after, high, post))

908

if pre:

909

pending.append((low, start-1, pre))

910

911

return found

912

913

def _bisect_recursive(self, paths):

914

"""Bisect for entries for all paths and their children.

915

916

This will use bisect to find all records for the supplied paths. It

917

will then continue to bisect for any records which are marked as

918

directories. (and renames?)

919

920

:param paths: A sorted list of (dir, name) pairs

921

eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]

922

:return: A dictionary mapping (dir, name, file_id) => [tree_info]

923

"""

924

# Map from (dir, name, file_id) => [tree_info]

925

found = {}

926

927

found_dir_names = set()

928

929

# Directories that have been read

930

processed_dirs = set()

931

# Get the ball rolling with the first bisect for all entries.

932

newly_found = self._bisect(paths)

933

934

while newly_found:

935

# Directories that need to be read

936

pending_dirs = set()

937

paths_to_search = set()

938

for entry_list in newly_found.itervalues():

939

for dir_name_id, trees_info in entry_list:

940

found[dir_name_id] = trees_info

941

found_dir_names.add(dir_name_id[:2])

942

is_dir = False

943

for tree_info in trees_info:

944

minikind = tree_info[0]

945

if minikind == 'd':

946

if is_dir:

947

# We already processed this one as a directory,

948

# we don't need to do the extra work again.

949

continue

950

subdir, name, file_id = dir_name_id

951

path = osutils.pathjoin(subdir, name)

952

is_dir = True

953

if path not in processed_dirs:

954

pending_dirs.add(path)

955

elif minikind == 'r':

956

# Rename, we need to directly search the target

957

# which is contained in the fingerprint column

958

dir_name = osutils.split(tree_info[1])

959

if dir_name[0] in pending_dirs:

960

# This entry will be found in the dir search

961

continue

962

if dir_name not in found_dir_names:

963

paths_to_search.add(tree_info[1])

964

# Now we have a list of paths to look for directly, and

965

# directory blocks that need to be read.

966

# newly_found is mixing the keys between (dir, name) and path

967

# entries, but that is okay, because we only really care about the

968

# targets.

969

newly_found = self._bisect(sorted(paths_to_search))

970

newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))

971

processed_dirs.update(pending_dirs)

972

return found

973

974

def _discard_merge_parents(self):

975

"""Discard any parents trees beyond the first.

976

977

Note that if this fails the dirstate is corrupted.

978

979

After this function returns the dirstate contains 2 trees, neither of

980

which are ghosted.

981

"""

982

self._read_header_if_needed()

983

parents = self.get_parent_ids()

984

if len(parents) < 1:

985

return

986

# only require all dirblocks if we are doing a full-pass removal.

987

self._read_dirblocks_if_needed()

988

dead_patterns = set([('a', 'r'), ('a', 'a'), ('r', 'r'), ('r', 'a')])

989

def iter_entries_removable():

990

for block in self._dirblocks:

991

deleted_positions = []

992

for pos, entry in enumerate(block[1]):

993

yield entry

994

if (entry[1][0][0], entry[1][1][0]) in dead_patterns:

995

deleted_positions.append(pos)

996

if deleted_positions:

997

if len(deleted_positions) == len(block[1]):

998

del block[1][:]

999

else:

1000

for pos in reversed(deleted_positions):

1001

del block[1][pos]

1002

# if the first parent is a ghost:

1003

if parents[0] in self.get_ghosts():

1004

empty_parent = [DirState.NULL_PARENT_DETAILS]

1005

for entry in iter_entries_removable():

1006

entry[1][1:] = empty_parent

1007

else:

1008

for entry in iter_entries_removable():

1009

del entry[1][2:]

1010

1011

self._ghosts = []

1012

self._parents = [parents[0]]

1013

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1014

self._header_state = DirState.IN_MEMORY_MODIFIED

1015

1016

def _empty_parent_info(self):

1017

return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -

1018

len(self._ghosts))

1019

1020

def _ensure_block(self, parent_block_index, parent_row_index, dirname):

1021

"""Ensure a block for dirname exists.

1022

1023

This function exists to let callers which know that there is a

1024

directory dirname ensure that the block for it exists. This block can

1025

fail to exist because of demand loading, or because a directory had no

1026

children. In either case it is not an error. It is however an error to

1027

call this if there is no parent entry for the directory, and thus the

1028

function requires the coordinates of such an entry to be provided.

1029

1030

The root row is special cased and can be indicated with a parent block

1031

and row index of -1

1032

1033

:param parent_block_index: The index of the block in which dirname's row

1034

exists.

1035

:param parent_row_index: The index in the parent block where the row

1036

exists.

1037

:param dirname: The utf8 dirname to ensure there is a block for.

1038

:return: The index for the block.

1039

"""

1040

if dirname == '' and parent_row_index == 0 and parent_block_index == 0:

1041

# This is the signature of the root row, and the

1042

# contents-of-root row is always index 1

1043

return 1

1044

# the basename of the directory must be the end of its full name.

1045

if not (parent_block_index == -1 and

1046

parent_block_index == -1 and dirname == ''):

1047

if not dirname.endswith(

1048

self._dirblocks[parent_block_index][1][parent_row_index][0][1]):

1049

raise AssertionError("bad dirname %r" % dirname)

1050

block_index, present = self._find_block_index_from_key((dirname, '', ''))

1051

if not present:

1052

## In future, when doing partial parsing, this should load and

1053

# populate the entire block.

1054

self._dirblocks.insert(block_index, (dirname, []))

1055

return block_index

1056

1057

def _entries_to_current_state(self, new_entries):

1058

"""Load new_entries into self.dirblocks.

1059

1060

Process new_entries into the current state object, making them the active

1061

state. The entries are grouped together by directory to form dirblocks.

1062

1063

:param new_entries: A sorted list of entries. This function does not sort

1064

to prevent unneeded overhead when callers have a sorted list already.

1065

:return: Nothing.

1066

"""

1067

if new_entries[0][0][0:2] != ('', ''):

1068

raise AssertionError(

1069

"Missing root row %r" % (new_entries[0][0],))

1070

# The two blocks here are deliberate: the root block and the

1071

# contents-of-root block.

1072

self._dirblocks = [('', []), ('', [])]

1073

current_block = self._dirblocks[0][1]

1074

current_dirname = ''

1075

root_key = ('', '')

1076

append_entry = current_block.append

1077

for entry in new_entries:

1078

if entry[0][0] != current_dirname:

1079

# new block - different dirname

1080

current_block = []

1081

current_dirname = entry[0][0]

1082

self._dirblocks.append((current_dirname, current_block))

1083

append_entry = current_block.append

1084

# append the entry to the current block

1085

append_entry(entry)

1086

self._split_root_dirblock_into_contents()

1087

1088

def _split_root_dirblock_into_contents(self):

1089

"""Split the root dirblocks into root and contents-of-root.

1090

1091

After parsing by path, we end up with root entries and contents-of-root

1092

entries in the same block. This loop splits them out again.

1093

"""

1094

# The above loop leaves the "root block" entries mixed with the

1095

# "contents-of-root block". But we don't want an if check on

1096

# all entries, so instead we just fix it up here.

1097

if self._dirblocks[1] != ('', []):

1098

raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))

1099

root_block = []

1100

contents_of_root_block = []

1101

for entry in self._dirblocks[0][1]:

1102

if not entry[0][1]: # This is a root entry

1103

root_block.append(entry)

1104

else:

1105

contents_of_root_block.append(entry)

1106

self._dirblocks[0] = ('', root_block)

1107

self._dirblocks[1] = ('', contents_of_root_block)

1108

1109

def _entries_for_path(self, path):

1110

"""Return a list with all the entries that match path for all ids."""

1111

dirname, basename = os.path.split(path)

1112

key = (dirname, basename, '')

1113

block_index, present = self._find_block_index_from_key(key)

1114

if not present:

1115

# the block which should contain path is absent.

1116

return []

1117

result = []

1118

block = self._dirblocks[block_index][1]

1119

entry_index, _ = self._find_entry_index(key, block)

1120

# we may need to look at multiple entries at this path: walk while the specific_files match.

1121

while (entry_index < len(block) and

1122

block[entry_index][0][0:2] == key[0:2]):

1123

result.append(block[entry_index])

1124

entry_index += 1

1125

return result

1126

1127

def _entry_to_line(self, entry):

1128

"""Serialize entry to a NULL delimited line ready for _get_output_lines.

1129

1130

:param entry: An entry_tuple as defined in the module docstring.

1131

"""

1132

entire_entry = list(entry[0])

1133

for tree_number, tree_data in enumerate(entry[1]):

1134

# (minikind, fingerprint, size, executable, tree_specific_string)

1135

entire_entry.extend(tree_data)

1136

# 3 for the key, 5 for the fields per tree.

1137

tree_offset = 3 + tree_number * 5

1138

# minikind

1139

entire_entry[tree_offset + 0] = tree_data[0]

1140

# size

1141

entire_entry[tree_offset + 2] = str(tree_data[2])

1142

# executable

1143

entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]

1144

return '\0'.join(entire_entry)

1145

1146

def _fields_per_entry(self):

1147

"""How many null separated fields should be in each entry row.

1148

1149

Each line now has an extra '\n' field which is not used

1150

so we just skip over it

1151

entry size:

1152

3 fields for the key

1153

+ number of fields per tree_data (5) * tree count

1154

+ newline

1155

"""

1156

tree_count = 1 + self._num_present_parents()

1157

return 3 + 5 * tree_count + 1

1158

1159

def _find_block(self, key, add_if_missing=False):

1160

"""Return the block that key should be present in.

1161

1162

:param key: A dirstate entry key.

1163

:return: The block tuple.

1164

"""

1165

block_index, present = self._find_block_index_from_key(key)

1166

if not present:

1167

if not add_if_missing:

1168

# check to see if key is versioned itself - we might want to

1169

# add it anyway, because dirs with no entries dont get a

1170

# dirblock at parse time.

1171

# This is an uncommon branch to take: most dirs have children,

1172

# and most code works with versioned paths.

1173

parent_base, parent_name = osutils.split(key[0])

1174

if not self._get_block_entry_index(parent_base, parent_name, 0)[3]:

1175

# some parent path has not been added - its an error to add

1176

# this child

1177

raise errors.NotVersionedError(key[0:2], str(self))

1178

self._dirblocks.insert(block_index, (key[0], []))

1179

return self._dirblocks[block_index]

1180

1181

def _find_block_index_from_key(self, key):

1182

"""Find the dirblock index for a key.

1183

1184

:return: The block index, True if the block for the key is present.

1185

"""

1186

if key[0:2] == ('', ''):

1187

return 0, True

1188

try:

1189

if (self._last_block_index is not None and

1190

self._dirblocks[self._last_block_index][0] == key[0]):

1191

return self._last_block_index, True

1192

except IndexError:

1193

pass

1194

block_index = bisect_dirblock(self._dirblocks, key[0], 1,

1195

cache=self._split_path_cache)

1196

# _right returns one-past-where-key is so we have to subtract

1197

# one to use it. we use _right here because there are two

1198

# '' blocks - the root, and the contents of root

1199

# we always have a minimum of 2 in self._dirblocks: root and

1200

# root-contents, and for '', we get 2 back, so this is

1201

# simple and correct:

1202

present = (block_index < len(self._dirblocks) and

1203

self._dirblocks[block_index][0] == key[0])

1204

self._last_block_index = block_index

1205

# Reset the entry index cache to the beginning of the block.

1206

self._last_entry_index = -1

1207

return block_index, present

1208

1209

def _find_entry_index(self, key, block):

1210

"""Find the entry index for a key in a block.

1211

1212

:return: The entry index, True if the entry for the key is present.

1213

"""

1214

len_block = len(block)

1215

try:

1216

if self._last_entry_index is not None:

1217

# mini-bisect here.

1218

entry_index = self._last_entry_index + 1

1219

# A hit is when the key is after the last slot, and before or

1220

# equal to the next slot.

1221

if ((entry_index > 0 and block[entry_index - 1][0] < key) and

1222

key <= block[entry_index][0]):

1223

self._last_entry_index = entry_index

1224

present = (block[entry_index][0] == key)

1225

return entry_index, present

1226

except IndexError:

1227

pass

1228

entry_index = bisect.bisect_left(block, (key, []))

1229

present = (entry_index < len_block and

1230

block[entry_index][0] == key)

1231

self._last_entry_index = entry_index

1232

return entry_index, present

1233

1234

@staticmethod

1235

def from_tree(tree, dir_state_filename, sha1_provider=None):

1236

"""Create a dirstate from a bzr Tree.

1237

1238

:param tree: The tree which should provide parent information and

1239

inventory ids.

1240

:param sha1_provider: an object meeting the SHA1Provider interface.

1241

If None, a DefaultSHA1Provider is used.

1242

:return: a DirState object which is currently locked for writing.

1243

(it was locked by DirState.initialize)

1244

"""

1245

result = DirState.initialize(dir_state_filename,

1246

sha1_provider=sha1_provider)

1247

try:

1248

tree.lock_read()

1249

try:

1250

parent_ids = tree.get_parent_ids()

1251

num_parents = len(parent_ids)

1252

parent_trees = []

1253

for parent_id in parent_ids:

1254

parent_tree = tree.branch.repository.revision_tree(parent_id)

1255

parent_trees.append((parent_id, parent_tree))

1256

parent_tree.lock_read()

1257

result.set_parent_trees(parent_trees, [])

1258

result.set_state_from_inventory(tree.inventory)

1259

finally:

1260

for revid, parent_tree in parent_trees:

1261

parent_tree.unlock()

1262

tree.unlock()

1263

except:

1264

# The caller won't have a chance to unlock this, so make sure we

1265

# cleanup ourselves

1266

result.unlock()

1267

raise

1268

return result

1269

1270

def update_by_delta(self, delta):

1271

"""Apply an inventory delta to the dirstate for tree 0

1272

1273

:param delta: An inventory delta. See Inventory.apply_delta for

1274

details.

1275

"""

1276

self._read_dirblocks_if_needed()

1277

insertions = {}

1278

removals = {}

1279

for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):

1280

if (file_id in insertions) or (file_id in removals):

1281

raise AssertionError("repeated file id in delta %r" % (file_id,))

1282

if old_path is not None:

1283

old_path = old_path.encode('utf-8')

1284

removals[file_id] = old_path

1285

if new_path is not None:

1286

new_path = new_path.encode('utf-8')

1287

dirname, basename = osutils.split(new_path)

1288

key = (dirname, basename, file_id)

1289

minikind = DirState._kind_to_minikind[inv_entry.kind]

1290

if minikind == 't':

1291

fingerprint = inv_entry.reference_revision

1292

else:

1293

fingerprint = ''

1294

insertions[file_id] = (key, minikind, inv_entry.executable,

1295

fingerprint, new_path)

1296

# Transform moves into delete+add pairs

1297

if None not in (old_path, new_path):

1298

for child in self._iter_child_entries(0, old_path):

1299

if child[0][2] in insertions or child[0][2] in removals:

1300

continue

1301

child_dirname = child[0][0]

1302

child_basename = child[0][1]

1303

minikind = child[1][0][0]

1304

fingerprint = child[1][0][4]

1305

executable = child[1][0][3]

1306

old_child_path = osutils.pathjoin(child[0][0],

1307

child[0][1])

1308

removals[child[0][2]] = old_child_path

1309

child_suffix = child_dirname[len(old_path):]

1310

new_child_dirname = (new_path + child_suffix)

1311

key = (new_child_dirname, child_basename, child[0][2])

1312

new_child_path = os.path.join(new_child_dirname,

1313

child_basename)

1314

insertions[child[0][2]] = (key, minikind, executable,

1315

fingerprint, new_child_path)

1316

self._apply_removals(removals.values())

1317

self._apply_insertions(insertions.values())

1318

1319

def _apply_removals(self, removals):

1320

for path in sorted(removals, reverse=True):

1321

dirname, basename = osutils.split(path)

1322

block_i, entry_i, d_present, f_present = \

1323

self._get_block_entry_index(dirname, basename, 0)

1324

entry = self._dirblocks[block_i][1][entry_i]

1325

self._make_absent(entry)

1326

# See if we have a malformed delta: deleting a directory must not

1327

# leave crud behind. This increases the number of bisects needed

1328

# substantially, but deletion or renames of large numbers of paths

1329

# is rare enough it shouldn't be an issue (famous last words?) RBC

1330

# 20080730.

1331

block_i, entry_i, d_present, f_present = \

1332

self._get_block_entry_index(path, '', 0)

1333

if d_present:

1334

# The dir block is still present in the dirstate; this could

1335

# be due to it being in a parent tree, or a corrupt delta.

1336

for child_entry in self._dirblocks[block_i][1]:

1337

if child_entry[1][0][0] not in ('r', 'a'):

1338

raise errors.InconsistentDelta(path, entry[0][2],

1339

"The file id was deleted but its children were "

1340

"not deleted.")

1341

1342

def _apply_insertions(self, adds):

1343

for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):

1344

self.update_minimal(key, minikind, executable, fingerprint,

1345

path_utf8=path_utf8)

1346

1347

def update_basis_by_delta(self, delta, new_revid):

1348

"""Update the parents of this tree after a commit.

1349

1350

This gives the tree one parent, with revision id new_revid. The

1351

inventory delta is applied to the current basis tree to generate the

1352

inventory for the parent new_revid, and all other parent trees are

1353

discarded.

1354

1355

Note that an exception during the operation of this method will leave

1356

the dirstate in a corrupt state where it should not be saved.

1357

1358

Finally, we expect all changes to be synchronising the basis tree with

1359

the working tree.

1360

1361

:param new_revid: The new revision id for the trees parent.

1362

:param delta: An inventory delta (see apply_inventory_delta) describing

1363

the changes from the current left most parent revision to new_revid.

1364

"""

1365

self._read_dirblocks_if_needed()

1366

self._discard_merge_parents()

1367

if self._ghosts != []:

1368

raise NotImplementedError(self.update_basis_by_delta)

1369

if len(self._parents) == 0:

1370

# setup a blank tree, the most simple way.

1371

empty_parent = DirState.NULL_PARENT_DETAILS

1372

for entry in self._iter_entries():

1373

entry[1].append(empty_parent)

1374

self._parents.append(new_revid)

1375

1376

self._parents[0] = new_revid

1377

1378

delta = sorted(delta, reverse=True)

1379

adds = []

1380

changes = []

1381

deletes = []

1382

# The paths this function accepts are unicode and must be encoded as we

1383

# go.

1384

encode = cache_utf8.encode

1385

inv_to_entry = self._inv_entry_to_details

1386

# delta is now (deletes, changes), (adds) in reverse lexographical

1387

# order.

1388

# deletes in reverse lexographic order are safe to process in situ.

1389

# renames are not, as a rename from any path could go to a path

1390

# lexographically lower, so we transform renames into delete, add pairs,

1391

# expanding them recursively as needed.

1392

# At the same time, to reduce interface friction we convert the input

1393

# inventory entries to dirstate.

1394

root_only = ('', '')

1395

for old_path, new_path, file_id, inv_entry in delta:

1396

if old_path is None:

1397

adds.append((None, encode(new_path), file_id,

1398

inv_to_entry(inv_entry), True))

1399

elif new_path is None:

1400

deletes.append((encode(old_path), None, file_id, None, True))

1401

elif (old_path, new_path) != root_only:

1402

# Renames:

1403

# Because renames must preserve their children we must have

1404

# processed all relocations and removes before hand. The sort

1405

# order ensures we've examined the child paths, but we also

1406

# have to execute the removals, or the split to an add/delete

1407

# pair will result in the deleted item being reinserted, or

1408

# renamed items being reinserted twice - and possibly at the

1409

# wrong place. Splitting into a delete/add pair also simplifies

1410

# the handling of entries with ('f', ...), ('r' ...) because

1411

# the target of the 'r' is old_path here, and we add that to

1412

# deletes, meaning that the add handler does not need to check

1413

# for 'r' items on every pass.

1414

self._update_basis_apply_deletes(deletes)

1415

deletes = []

1416

new_path_utf8 = encode(new_path)

1417

# Split into an add/delete pair recursively.

1418

adds.append((None, new_path_utf8, file_id,

1419

inv_to_entry(inv_entry), False))

1420

# Expunge deletes that we've seen so that deleted/renamed

1421

# children of a rename directory are handled correctly.

1422

new_deletes = reversed(list(self._iter_child_entries(1,

1423

encode(old_path))))

1424

# Remove the current contents of the tree at orig_path, and

1425

# reinsert at the correct new path.

1426

for entry in new_deletes:

1427

if entry[0][0]:

1428

source_path = entry[0][0] + '/' + entry[0][1]

1429

else:

1430

source_path = entry[0][1]

1431

if new_path_utf8:

1432

target_path = new_path_utf8 + source_path[len(old_path):]

1433

else:

1434

if old_path == '':

1435

raise AssertionError("cannot rename directory to"

1436

" itself")

1437

target_path = source_path[len(old_path) + 1:]

1438

adds.append((None, target_path, entry[0][2], entry[1][1], False))

1439

deletes.append(

1440

(source_path, target_path, entry[0][2], None, False))

1441

deletes.append(

1442

(encode(old_path), new_path, file_id, None, False))

1443

else:

1444

# changes to just the root should not require remove/insertion

1445

# of everything.

1446

changes.append((encode(old_path), encode(new_path), file_id,

1447

inv_to_entry(inv_entry)))

1448

1449

# Finish expunging deletes/first half of renames.

1450

self._update_basis_apply_deletes(deletes)

1451

# Reinstate second half of renames and new paths.

1452

self._update_basis_apply_adds(adds)

1453

# Apply in-situ changes.

1454

self._update_basis_apply_changes(changes)

1455

1456

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1457

self._header_state = DirState.IN_MEMORY_MODIFIED

1458

self._id_index = None

1459

return

1460

1461

def _update_basis_apply_adds(self, adds):

1462

"""Apply a sequence of adds to tree 1 during update_basis_by_delta.

1463

1464

They may be adds, or renames that have been split into add/delete

1465

pairs.

1466

1467

:param adds: A sequence of adds. Each add is a tuple:

1468

(None, new_path_utf8, file_id, (entry_details), real_add). real_add

1469

is False when the add is the second half of a remove-and-reinsert

1470

pair created to handle renames and deletes.

1471

"""

1472

# Adds are accumulated partly from renames, so can be in any input

1473

# order - sort it.

1474

adds.sort()

1475

# adds is now in lexographic order, which places all parents before

1476

# their children, so we can process it linearly.

1477

absent = 'ar'

1478

for old_path, new_path, file_id, new_details, real_add in adds:

1479

# the entry for this file_id must be in tree 0.

1480

entry = self._get_entry(0, file_id, new_path)

1481

if entry[0] is None or entry[0][2] != file_id:

1482

self._changes_aborted = True

1483

raise errors.InconsistentDelta(new_path, file_id,

1484

'working tree does not contain new entry')

1485

if real_add and entry[1][1][0] not in absent:

1486

self._changes_aborted = True

1487

raise errors.InconsistentDelta(new_path, file_id,

1488

'The entry was considered to be a genuinely new record,'

1489

' but there was already an old record for it.')

1490

# We don't need to update the target of an 'r' because the handling

1491

# of renames turns all 'r' situations into a delete at the original

1492

# location.

1493

entry[1][1] = new_details

1494

1495

def _update_basis_apply_changes(self, changes):

1496

"""Apply a sequence of changes to tree 1 during update_basis_by_delta.

1497

1498

:param adds: A sequence of changes. Each change is a tuple:

1499

(path_utf8, path_utf8, file_id, (entry_details))

1500

"""

1501

absent = 'ar'

1502

for old_path, new_path, file_id, new_details in changes:

1503

# the entry for this file_id must be in tree 0.

1504

entry = self._get_entry(0, file_id, new_path)

1505

if entry[0] is None or entry[0][2] != file_id:

1506

self._changes_aborted = True

1507

raise errors.InconsistentDelta(new_path, file_id,

1508

'working tree does not contain new entry')

1509

if (entry[1][0][0] in absent or

1510

entry[1][1][0] in absent):

1511

self._changes_aborted = True

1512

raise errors.InconsistentDelta(new_path, file_id,

1513

'changed considered absent')

1514

entry[1][1] = new_details

1515

1516

def _update_basis_apply_deletes(self, deletes):

1517

"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.

1518

1519

They may be deletes, or renames that have been split into add/delete

1520

pairs.

1521

1522

:param deletes: A sequence of deletes. Each delete is a tuple:

1523

(old_path_utf8, new_path_utf8, file_id, None, real_delete).

1524

real_delete is True when the desired outcome is an actual deletion

1525

rather than the rename handling logic temporarily deleting a path

1526

during the replacement of a parent.

1527

"""

1528

null = DirState.NULL_PARENT_DETAILS

1529

for old_path, new_path, file_id, _, real_delete in deletes:

1530

if real_delete != (new_path is None):

1531

raise AssertionError("bad delete delta")

1532

# the entry for this file_id must be in tree 1.

1533

dirname, basename = osutils.split(old_path)

1534

block_index, entry_index, dir_present, file_present = \

1535

self._get_block_entry_index(dirname, basename, 1)

1536

if not file_present:

1537

self._changes_aborted = True

1538

raise errors.InconsistentDelta(old_path, file_id,

1539

'basis tree does not contain removed entry')

1540

entry = self._dirblocks[block_index][1][entry_index]

1541

if entry[0][2] != file_id:

1542

self._changes_aborted = True

1543

raise errors.InconsistentDelta(old_path, file_id,

1544

'mismatched file_id in tree 1')

1545

if real_delete:

1546

if entry[1][0][0] != 'a':

1547

self._changes_aborted = True

1548

raise errors.InconsistentDelta(old_path, file_id,

1549

'This was marked as a real delete, but the WT state'

1550

' claims that it still exists and is versioned.')

1551

del self._dirblocks[block_index][1][entry_index]

1552

else:

1553

if entry[1][0][0] == 'a':

1554

self._changes_aborted = True

1555

raise errors.InconsistentDelta(old_path, file_id,

1556

'The entry was considered a rename, but the source path'

1557

' is marked as absent.')

1558

# For whatever reason, we were asked to rename an entry

1559

# that was originally marked as deleted. This could be

1560

# because we are renaming the parent directory, and the WT

1561

# current state has the file marked as deleted.

1562

elif entry[1][0][0] == 'r':

1563

# implement the rename

1564

del self._dirblocks[block_index][1][entry_index]

1565

else:

1566

# it is being resurrected here, so blank it out temporarily.

1567

self._dirblocks[block_index][1][entry_index][1][1] = null

1568

1569

def _observed_sha1(self, entry, sha1, stat_value,

1570

_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):

1571

"""Note the sha1 of a file.

1572

1573

:param entry: The entry the sha1 is for.

1574

:param sha1: The observed sha1.

1575

:param stat_value: The os.lstat for the file.

1576

"""

1577

try:

1578

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

1579

except KeyError:

1580

# Unhandled kind

1581

return None

1582

packed_stat = _pack_stat(stat_value)

1583

if minikind == 'f':

1584

if self._cutoff_time is None:

1585

self._sha_cutoff_time()

1586

if (stat_value.st_mtime < self._cutoff_time

1587

and stat_value.st_ctime < self._cutoff_time):

1588

entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],

1589

packed_stat)

1590

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

1591

1592

def _sha_cutoff_time(self):

1593

"""Return cutoff time.

1594

1595

Files modified more recently than this time are at risk of being

1596

undetectably modified and so can't be cached.

1597

"""

1598

# Cache the cutoff time as long as we hold a lock.

1599

# time.time() isn't super expensive (approx 3.38us), but

1600

# when you call it 50,000 times it adds up.

1601

# For comparison, os.lstat() costs 7.2us if it is hot.

1602

self._cutoff_time = int(time.time()) - 3

1603

return self._cutoff_time

1604

1605

def _lstat(self, abspath, entry):

1606

"""Return the os.lstat value for this path."""

1607

return os.lstat(abspath)

1608

1609

def _sha1_file_and_mutter(self, abspath):

1610

# when -Dhashcache is turned on, this is monkey-patched in to log

1611

# file reads

1612

trace.mutter("dirstate sha1 " + abspath)

1613

return self._sha1_provider.sha1(abspath)

1614

1615

def _is_executable(self, mode, old_executable):

1616

"""Is this file executable?"""

1617

return bool(S_IEXEC & mode)

1618

1619

def _is_executable_win32(self, mode, old_executable):

1620

"""On win32 the executable bit is stored in the dirstate."""

1621

return old_executable

1622

1623

if sys.platform == 'win32':

1624

_is_executable = _is_executable_win32

1625

1626

def _read_link(self, abspath, old_link):

1627

"""Read the target of a symlink"""

1628

# TODO: jam 200700301 On Win32, this could just return the value

1629

# already in memory. However, this really needs to be done at a

1630

# higher level, because there either won't be anything on disk,

1631

# or the thing on disk will be a file.

1632

fs_encoding = osutils._fs_enc

1633

if isinstance(abspath, unicode):

1634

# abspath is defined as the path to pass to lstat. readlink is

1635

# buggy in python < 2.6 (it doesn't encode unicode path into FS

1636

# encoding), so we need to encode ourselves knowing that unicode

1637

# paths are produced by UnicodeDirReader on purpose.

1638

abspath = abspath.encode(fs_encoding)

1639

target = os.readlink(abspath)

1640

if fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):

1641

# Change encoding if needed

1642

target = target.decode(fs_encoding).encode('UTF-8')

1643

return target

1644

1645

def get_ghosts(self):

1646

"""Return a list of the parent tree revision ids that are ghosts."""

1647

self._read_header_if_needed()

1648

return self._ghosts

1649

1650

def get_lines(self):

1651

"""Serialise the entire dirstate to a sequence of lines."""

1652

if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and

1653

self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):

1654

# read whats on disk.

1655

self._state_file.seek(0)

1656

return self._state_file.readlines()

1657

lines = []

1658

lines.append(self._get_parents_line(self.get_parent_ids()))

1659

lines.append(self._get_ghosts_line(self._ghosts))

1660

# append the root line which is special cased

1661

lines.extend(map(self._entry_to_line, self._iter_entries()))

1662

return self._get_output_lines(lines)

1663

1664

def _get_ghosts_line(self, ghost_ids):

1665

"""Create a line for the state file for ghost information."""

1666

return '\0'.join([str(len(ghost_ids))] + ghost_ids)

1667

1668

def _get_parents_line(self, parent_ids):

1669

"""Create a line for the state file for parents information."""

1670

return '\0'.join([str(len(parent_ids))] + parent_ids)

1671

1672

def _get_fields_to_entry(self):

1673

"""Get a function which converts entry fields into a entry record.

1674

1675

This handles size and executable, as well as parent records.

1676

1677

:return: A function which takes a list of fields, and returns an

1678

appropriate record for storing in memory.

1679

"""

1680

# This is intentionally unrolled for performance

1681

num_present_parents = self._num_present_parents()

1682

if num_present_parents == 0:

1683

def fields_to_entry_0_parents(fields, _int=int):

1684

path_name_file_id_key = (fields[0], fields[1], fields[2])

1685

return (path_name_file_id_key, [

1686

( # Current tree

1687

fields[3], # minikind

1688

fields[4], # fingerprint

1689

_int(fields[5]), # size

1690

fields[6] == 'y', # executable

1691

fields[7], # packed_stat or revision_id

1692

)])

1693

return fields_to_entry_0_parents

1694

elif num_present_parents == 1:

1695

def fields_to_entry_1_parent(fields, _int=int):

1696

path_name_file_id_key = (fields[0], fields[1], fields[2])

1697

return (path_name_file_id_key, [

1698

( # Current tree

1699

fields[3], # minikind

1700

fields[4], # fingerprint

1701

_int(fields[5]), # size

1702

fields[6] == 'y', # executable

1703

fields[7], # packed_stat or revision_id

1704

1705

( # Parent 1

1706

fields[8], # minikind

1707

fields[9], # fingerprint

1708

_int(fields[10]), # size

1709

fields[11] == 'y', # executable

1710

fields[12], # packed_stat or revision_id

1711

1712

])

1713

return fields_to_entry_1_parent

1714

elif num_present_parents == 2:

1715

def fields_to_entry_2_parents(fields, _int=int):

1716

path_name_file_id_key = (fields[0], fields[1], fields[2])

1717

return (path_name_file_id_key, [

1718

( # Current tree

1719

fields[3], # minikind

1720

fields[4], # fingerprint

1721

_int(fields[5]), # size

1722

fields[6] == 'y', # executable

1723

fields[7], # packed_stat or revision_id

1724

1725

( # Parent 1

1726

fields[8], # minikind

1727

fields[9], # fingerprint

1728

_int(fields[10]), # size

1729

fields[11] == 'y', # executable

1730

fields[12], # packed_stat or revision_id

1731

1732

( # Parent 2

1733

fields[13], # minikind

1734

fields[14], # fingerprint

1735

_int(fields[15]), # size

1736

fields[16] == 'y', # executable

1737

fields[17], # packed_stat or revision_id

1738

1739

])

1740

return fields_to_entry_2_parents

1741

else:

1742

def fields_to_entry_n_parents(fields, _int=int):

1743

path_name_file_id_key = (fields[0], fields[1], fields[2])

1744

trees = [(fields[cur], # minikind

1745

fields[cur+1], # fingerprint

1746

_int(fields[cur+2]), # size

1747

fields[cur+3] == 'y', # executable

1748

fields[cur+4], # stat or revision_id

1749

) for cur in xrange(3, len(fields)-1, 5)]

1750

return path_name_file_id_key, trees

1751

return fields_to_entry_n_parents

1752

1753

def get_parent_ids(self):

1754

"""Return a list of the parent tree ids for the directory state."""

1755

self._read_header_if_needed()

1756

return list(self._parents)

1757

1758

def _get_block_entry_index(self, dirname, basename, tree_index):

1759

"""Get the coordinates for a path in the state structure.

1760

1761

:param dirname: The utf8 dirname to lookup.

1762

:param basename: The utf8 basename to lookup.

1763

:param tree_index: The index of the tree for which this lookup should

1764

be attempted.

1765

:return: A tuple describing where the path is located, or should be

1766

inserted. The tuple contains four fields: the block index, the row

1767

index, the directory is present (boolean), the entire path is

1768

present (boolean). There is no guarantee that either

1769

coordinate is currently reachable unless the found field for it is

1770

True. For instance, a directory not present in the searched tree

1771

may be returned with a value one greater than the current highest

1772

block offset. The directory present field will always be True when

1773

the path present field is True. The directory present field does

1774

NOT indicate that the directory is present in the searched tree,

1775

rather it indicates that there are at least some files in some

1776

tree present there.

1777

"""

1778

self._read_dirblocks_if_needed()

1779

key = dirname, basename, ''

1780

block_index, present = self._find_block_index_from_key(key)

1781

if not present:

1782

# no such directory - return the dir index and 0 for the row.

1783

return block_index, 0, False, False

1784

block = self._dirblocks[block_index][1] # access the entries only

1785

entry_index, present = self._find_entry_index(key, block)

1786

# linear search through entries at this path to find the one

1787

# requested.

1788

while entry_index < len(block) and block[entry_index][0][1] == basename:

1789

if block[entry_index][1][tree_index][0] not in 'ar':

1790

# neither absent or relocated

1791

return block_index, entry_index, True, True

1792

entry_index += 1

1793

return block_index, entry_index, True, False

1794

1795

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None, include_deleted=False):

1796

"""Get the dirstate entry for path in tree tree_index.

1797

1798

If either file_id or path is supplied, it is used as the key to lookup.

1799

If both are supplied, the fastest lookup is used, and an error is

1800

raised if they do not both point at the same row.

1801

1802

:param tree_index: The index of the tree we wish to locate this path

1803

in. If the path is present in that tree, the entry containing its

1804

details is returned, otherwise (None, None) is returned

1805

0 is the working tree, higher indexes are successive parent

1806

trees.

1807

:param fileid_utf8: A utf8 file_id to look up.

1808

:param path_utf8: An utf8 path to be looked up.

1809

:param include_deleted: If True, and performing a lookup via

1810

fileid_utf8 rather than path_utf8, return an entry for deleted

1811

(absent) paths.

1812

:return: The dirstate entry tuple for path, or (None, None)

1813

"""

1814

self._read_dirblocks_if_needed()

1815

if path_utf8 is not None:

1816

if type(path_utf8) is not str:

1817

raise AssertionError('path_utf8 is not a str: %s %s'

1818

% (type(path_utf8), path_utf8))

1819

# path lookups are faster

1820

dirname, basename = osutils.split(path_utf8)

1821

block_index, entry_index, dir_present, file_present = \

1822

self._get_block_entry_index(dirname, basename, tree_index)

1823

if not file_present:

1824

return None, None

1825

entry = self._dirblocks[block_index][1][entry_index]

1826

if not (entry[0][2] and entry[1][tree_index][0] not in ('a', 'r')):

1827

raise AssertionError('unversioned entry?')

1828

if fileid_utf8:

1829

if entry[0][2] != fileid_utf8:

1830

self._changes_aborted = True

1831

raise errors.BzrError('integrity error ? : mismatching'

1832

' tree_index, file_id and path')

1833

return entry

1834

else:

1835

possible_keys = self._get_id_index().get(fileid_utf8, None)

1836

if not possible_keys:

1837

return None, None

1838

for key in possible_keys:

1839

block_index, present = \

1840

self._find_block_index_from_key(key)

1841

# strange, probably indicates an out of date

1842

# id index - for now, allow this.

1843

if not present:

1844

continue

1845

# WARNING: DO not change this code to use _get_block_entry_index

1846

# as that function is not suitable: it does not use the key

1847

# to lookup, and thus the wrong coordinates are returned.

1848

block = self._dirblocks[block_index][1]

1849

entry_index, present = self._find_entry_index(key, block)

1850

if present:

1851

entry = self._dirblocks[block_index][1][entry_index]

1852

if entry[1][tree_index][0] in 'fdlt':

1853

# this is the result we are looking for: the

1854

# real home of this file_id in this tree.

1855

return entry

1856

if entry[1][tree_index][0] == 'a':

1857

# there is no home for this entry in this tree

1858

if include_deleted:

1859

return entry

1860

return None, None

1861

if entry[1][tree_index][0] != 'r':

1862

raise AssertionError(

1863

"entry %r has invalid minikind %r for tree %r" \

1864

% (entry,

1865

entry[1][tree_index][0],

1866

tree_index))

1867

real_path = entry[1][tree_index][1]

1868

return self._get_entry(tree_index, fileid_utf8=fileid_utf8,

1869

path_utf8=real_path)

1870

return None, None

1871

1872

@classmethod

1873

def initialize(cls, path, sha1_provider=None):

1874

"""Create a new dirstate on path.

1875

1876

The new dirstate will be an empty tree - that is it has no parents,

1877

and only a root node - which has id ROOT_ID.

1878

1879

:param path: The name of the file for the dirstate.

1880

:param sha1_provider: an object meeting the SHA1Provider interface.

1881

If None, a DefaultSHA1Provider is used.

1882

:return: A write-locked DirState object.

1883

"""

1884

# This constructs a new DirState object on a path, sets the _state_file

1885

# to a new empty file for that path. It then calls _set_data() with our

1886

# stock empty dirstate information - a root with ROOT_ID, no children,

1887

# and no parents. Finally it calls save() to ensure that this data will

1888

# persist.

1889

if sha1_provider is None:

1890

sha1_provider = DefaultSHA1Provider()

1891

result = cls(path, sha1_provider)

1892

# root dir and root dir contents with no children.

1893

empty_tree_dirblocks = [('', []), ('', [])]

1894

# a new root directory, with a NULLSTAT.

1895

empty_tree_dirblocks[0][1].append(

1896

(('', '', inventory.ROOT_ID), [

1897

('d', '', 0, False, DirState.NULLSTAT),

1898

]))

1899

result.lock_write()

1900

try:

1901

result._set_data([], empty_tree_dirblocks)

1902

result.save()

1903

except:

1904

result.unlock()

1905

raise

1906

return result

1907

1908

@staticmethod

1909

def _inv_entry_to_details(inv_entry):

1910

"""Convert an inventory entry (from a revision tree) to state details.

1911

1912

:param inv_entry: An inventory entry whose sha1 and link targets can be

1913

relied upon, and which has a revision set.

1914

:return: A details tuple - the details for a single tree at a path +

1915

id.

1916

"""

1917

kind = inv_entry.kind

1918

minikind = DirState._kind_to_minikind[kind]

1919

tree_data = inv_entry.revision

1920

if kind == 'directory':

1921

fingerprint = ''

1922

size = 0

1923

executable = False

1924

elif kind == 'symlink':

1925

if inv_entry.symlink_target is None:

1926

fingerprint = ''

1927

else:

1928

fingerprint = inv_entry.symlink_target.encode('utf8')

1929

size = 0

1930

executable = False

1931

elif kind == 'file':

1932

fingerprint = inv_entry.text_sha1 or ''

1933

size = inv_entry.text_size or 0

1934

executable = inv_entry.executable

1935

elif kind == 'tree-reference':

1936

fingerprint = inv_entry.reference_revision or ''

1937

size = 0

1938

executable = False

1939

else:

1940

raise Exception("can't pack %s" % inv_entry)

1941

return (minikind, fingerprint, size, executable, tree_data)

1942

1943

def _iter_child_entries(self, tree_index, path_utf8):

1944

"""Iterate over all the entries that are children of path_utf.

1945

1946

This only returns entries that are present (not in 'a', 'r') in

1947

tree_index. tree_index data is not refreshed, so if tree 0 is used,

1948

results may differ from that obtained if paths were statted to

1949

determine what ones were directories.

1950

1951

Asking for the children of a non-directory will return an empty

1952

iterator.

1953

"""

1954

pending_dirs = []

1955

next_pending_dirs = [path_utf8]

1956

absent = 'ar'

1957

while next_pending_dirs:

1958

pending_dirs = next_pending_dirs

1959

next_pending_dirs = []

1960

for path in pending_dirs:

1961

block_index, present = self._find_block_index_from_key(

1962

(path, '', ''))

1963

if block_index == 0:

1964

block_index = 1

1965

if len(self._dirblocks) == 1:

1966

# asked for the children of the root with no other

1967

# contents.

1968

return

1969

if not present:

1970

# children of a non-directory asked for.

1971

continue

1972

block = self._dirblocks[block_index]

1973

for entry in block[1]:

1974

kind = entry[1][tree_index][0]

1975

if kind not in absent:

1976

yield entry

1977

if kind == 'd':

1978

if entry[0][0]:

1979

path = entry[0][0] + '/' + entry[0][1]

1980

else:

1981

path = entry[0][1]

1982

next_pending_dirs.append(path)

1983

1984

def _iter_entries(self):

1985

"""Iterate over all the entries in the dirstate.

1986

1987

Each yelt item is an entry in the standard format described in the

1988

docstring of bzrlib.dirstate.

1989

"""

1990

self._read_dirblocks_if_needed()

1991

for directory in self._dirblocks:

1992

for entry in directory[1]:

1993

yield entry

1994

1995

def _get_id_index(self):

1996

"""Get an id index of self._dirblocks."""

1997

if self._id_index is None:

1998

id_index = {}

1999

for key, tree_details in self._iter_entries():

2000

id_index.setdefault(key[2], set()).add(key)

2001

self._id_index = id_index

2002

return self._id_index

2003

2004

def _get_output_lines(self, lines):

2005

"""Format lines for final output.

2006

2007

:param lines: A sequence of lines containing the parents list and the

2008

path lines.

2009

"""

2010

output_lines = [DirState.HEADER_FORMAT_3]

2011

lines.append('') # a final newline

2012

inventory_text = '\0\n\0'.join(lines)

2013

output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))

2014

# -3, 1 for num parents, 1 for ghosts, 1 for final newline

2015

num_entries = len(lines)-3

2016

output_lines.append('num_entries: %s\n' % (num_entries,))

2017

output_lines.append(inventory_text)

2018

return output_lines

2019

2020

def _make_deleted_row(self, fileid_utf8, parents):

2021

"""Return a deleted row for fileid_utf8."""

2022

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

2023

''), parents

2024

2025

def _num_present_parents(self):

2026

"""The number of parent entries in each record row."""

2027

return len(self._parents) - len(self._ghosts)

2028

2029

@staticmethod

2030

def on_file(path, sha1_provider=None):

2031

"""Construct a DirState on the file at path "path".

2032

2033

:param path: The path at which the dirstate file on disk should live.

2034

:param sha1_provider: an object meeting the SHA1Provider interface.

2035

If None, a DefaultSHA1Provider is used.

2036

:return: An unlocked DirState object, associated with the given path.

2037

"""

2038

if sha1_provider is None:

2039

sha1_provider = DefaultSHA1Provider()

2040

result = DirState(path, sha1_provider)

2041

return result

2042

2043

def _read_dirblocks_if_needed(self):

2044

"""Read in all the dirblocks from the file if they are not in memory.

2045

2046

This populates self._dirblocks, and sets self._dirblock_state to

2047

IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block

2048

2049

"""

2050

self._read_header_if_needed()

2051

if self._dirblock_state == DirState.NOT_IN_MEMORY:

2052

_read_dirblocks(self)

2053

2054

def _read_header(self):

2055

"""This reads in the metadata header, and the parent ids.

2056

2057

After reading in, the file should be positioned at the null

2058

just before the start of the first record in the file.

2059

2060

:return: (expected crc checksum, number of entries, parent list)

2061

"""

2062

self._read_prelude()

2063

parent_line = self._state_file.readline()

2064

info = parent_line.split('\0')

2065

num_parents = int(info[0])

2066

self._parents = info[1:-1]

2067

ghost_line = self._state_file.readline()

2068

info = ghost_line.split('\0')

2069

num_ghosts = int(info[1])

2070

self._ghosts = info[2:-1]

2071

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2072

self._end_of_header = self._state_file.tell()

2073

2074

def _read_header_if_needed(self):

2075

"""Read the header of the dirstate file if needed."""

2076

# inline this as it will be called a lot

2077

if not self._lock_token:

2078

raise errors.ObjectNotLocked(self)

2079

if self._header_state == DirState.NOT_IN_MEMORY:

2080

self._read_header()

2081

2082

def _read_prelude(self):

2083

"""Read in the prelude header of the dirstate file.

2084

2085

This only reads in the stuff that is not connected to the crc

2086

checksum. The position will be correct to read in the rest of

2087

the file and check the checksum after this point.

2088

The next entry in the file should be the number of parents,

2089

and their ids. Followed by a newline.

2090

"""

2091

header = self._state_file.readline()

2092

if header != DirState.HEADER_FORMAT_3:

2093

raise errors.BzrError(

2094

'invalid header line: %r' % (header,))

2095

crc_line = self._state_file.readline()

2096

if not crc_line.startswith('crc32: '):

2097

raise errors.BzrError('missing crc32 checksum: %r' % crc_line)

2098

self.crc_expected = int(crc_line[len('crc32: '):-1])

2099

num_entries_line = self._state_file.readline()

2100

if not num_entries_line.startswith('num_entries: '):

2101

raise errors.BzrError('missing num_entries line')

2102

self._num_entries = int(num_entries_line[len('num_entries: '):-1])

2103

2104

def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):

2105

"""Find a sha1 given a stat lookup."""

2106

return self._get_packed_stat_index().get(_pack_stat(stat_result), None)

2107

2108

def _get_packed_stat_index(self):

2109

"""Get a packed_stat index of self._dirblocks."""

2110

if self._packed_stat_index is None:

2111

index = {}

2112

for key, tree_details in self._iter_entries():

2113

if tree_details[0][0] == 'f':

2114

index[tree_details[0][4]] = tree_details[0][1]

2115

self._packed_stat_index = index

2116

return self._packed_stat_index

2117

2118

def save(self):

2119

"""Save any pending changes created during this session.

2120

2121

We reuse the existing file, because that prevents race conditions with

2122

file creation, and use oslocks on it to prevent concurrent modification

2123

and reads - because dirstate's incremental data aggregation is not

2124

compatible with reading a modified file, and replacing a file in use by

2125

another process is impossible on Windows.

2126

2127

A dirstate in read only mode should be smart enough though to validate

2128

that the file has not changed, and otherwise discard its cache and

2129

start over, to allow for fine grained read lock duration, so 'status'

2130

wont block 'commit' - for example.

2131

"""

2132

if self._changes_aborted:

2133

# Should this be a warning? For now, I'm expecting that places that

2134

# mark it inconsistent will warn, making a warning here redundant.

2135

trace.mutter('Not saving DirState because '

2136

'_changes_aborted is set.')

2137

return

2138

if (self._header_state == DirState.IN_MEMORY_MODIFIED or

2139

self._dirblock_state == DirState.IN_MEMORY_MODIFIED):

2140

2141

grabbed_write_lock = False

2142

if self._lock_state != 'w':

2143

grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()

2144

# Switch over to the new lock, as the old one may be closed.

2145

# TODO: jam 20070315 We should validate the disk file has

2146

# not changed contents. Since temporary_write_lock may

2147

# not be an atomic operation.

2148

self._lock_token = new_lock

2149

self._state_file = new_lock.f

2150

if not grabbed_write_lock:

2151

# We couldn't grab a write lock, so we switch back to a read one

2152

return

2153

try:

2154

self._state_file.seek(0)

2155

self._state_file.writelines(self.get_lines())

2156

self._state_file.truncate()

2157

self._state_file.flush()

2158

self._header_state = DirState.IN_MEMORY_UNMODIFIED

2159

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

2160

finally:

2161

if grabbed_write_lock:

2162

self._lock_token = self._lock_token.restore_read_lock()

2163

self._state_file = self._lock_token.f

2164

# TODO: jam 20070315 We should validate the disk file has

2165

# not changed contents. Since restore_read_lock may

2166

# not be an atomic operation.

2167

2168

def _set_data(self, parent_ids, dirblocks):

2169

"""Set the full dirstate data in memory.

2170

2171

This is an internal function used to completely replace the objects

2172

in memory state. It puts the dirstate into state 'full-dirty'.

2173

2174

:param parent_ids: A list of parent tree revision ids.

2175

:param dirblocks: A list containing one tuple for each directory in the

2176

tree. Each tuple contains the directory path and a list of entries

2177

found in that directory.

2178

"""

2179

# our memory copy is now authoritative.

2180

self._dirblocks = dirblocks

2181

self._header_state = DirState.IN_MEMORY_MODIFIED

2182

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2183

self._parents = list(parent_ids)

2184

self._id_index = None

2185

self._packed_stat_index = None

2186

2187

def set_path_id(self, path, new_id):

2188

"""Change the id of path to new_id in the current working tree.

2189

2190

:param path: The path inside the tree to set - '' is the root, 'foo'

2191

is the path foo in the root.

2192

:param new_id: The new id to assign to the path. This must be a utf8

2193

file id (not unicode, and not None).

2194

"""

2195

self._read_dirblocks_if_needed()

2196

if len(path):

2197

# TODO: logic not written

2198

raise NotImplementedError(self.set_path_id)

2199

# TODO: check new id is unique

2200

entry = self._get_entry(0, path_utf8=path)

2201

if entry[0][2] == new_id:

2202

# Nothing to change.

2203

return

2204

# mark the old path absent, and insert a new root path

2205

self._make_absent(entry)

2206

self.update_minimal(('', '', new_id), 'd',

2207

path_utf8='', packed_stat=entry[1][0][4])

2208

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2209

if self._id_index is not None:

2210

self._id_index.setdefault(new_id, set()).add(entry[0])

2211

2212

def set_parent_trees(self, trees, ghosts):

2213

"""Set the parent trees for the dirstate.

2214

2215

:param trees: A list of revision_id, tree tuples. tree must be provided

2216

even if the revision_id refers to a ghost: supply an empty tree in

2217

this case.

2218

:param ghosts: A list of the revision_ids that are ghosts at the time

2219

of setting.

2220

"""

2221

# TODO: generate a list of parent indexes to preserve to save

2222

# processing specific parent trees. In the common case one tree will

2223

# be preserved - the left most parent.

2224

# TODO: if the parent tree is a dirstate, we might want to walk them

2225

# all by path in parallel for 'optimal' common-case performance.

2226

# generate new root row.

2227

self._read_dirblocks_if_needed()

2228

# TODO future sketch: Examine the existing parents to generate a change

2229

# map and then walk the new parent trees only, mapping them into the

2230

# dirstate. Walk the dirstate at the same time to remove unreferenced

2231

# entries.

2232

# for now:

2233

# sketch: loop over all entries in the dirstate, cherry picking

2234

# entries from the parent trees, if they are not ghost trees.

2235

# after we finish walking the dirstate, all entries not in the dirstate

2236

# are deletes, so we want to append them to the end as per the design

2237

# discussions. So do a set difference on ids with the parents to

2238

# get deletes, and add them to the end.

2239

# During the update process we need to answer the following questions:

2240

# - find other keys containing a fileid in order to create cross-path

2241

# links. We dont't trivially use the inventory from other trees

2242

# because this leads to either double touching, or to accessing

2243

# missing keys,

2244

# - find other keys containing a path

2245

# We accumulate each entry via this dictionary, including the root

2246

by_path = {}

2247

id_index = {}

2248

# we could do parallel iterators, but because file id data may be

2249

# scattered throughout, we dont save on index overhead: we have to look

2250

# at everything anyway. We can probably save cycles by reusing parent

2251

# data and doing an incremental update when adding an additional

2252

# parent, but for now the common cases are adding a new parent (merge),

2253

# and replacing completely (commit), and commit is more common: so

2254

# optimise merge later.

2255

2256

# ---- start generation of full tree mapping data

2257

# what trees should we use?

2258

parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]

2259

# how many trees do we end up with

2260

parent_count = len(parent_trees)

2261

2262

# one: the current tree

2263

for entry in self._iter_entries():

2264

# skip entries not in the current tree

2265

if entry[1][0][0] in 'ar': # absent, relocated

2266

continue

2267

by_path[entry[0]] = [entry[1][0]] + \

2268

[DirState.NULL_PARENT_DETAILS] * parent_count

2269

id_index[entry[0][2]] = set([entry[0]])

2270

2271

# now the parent trees:

2272

for tree_index, tree in enumerate(parent_trees):

2273

# the index is off by one, adjust it.

2274

tree_index = tree_index + 1

2275

# when we add new locations for a fileid we need these ranges for

2276

# any fileid in this tree as we set the by_path[id] to:

2277

# already_processed_tree_details + new_details + new_location_suffix

2278

# the suffix is from tree_index+1:parent_count+1.

2279

new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)

2280

# now stitch in all the entries from this tree

2281

for path, entry in tree.inventory.iter_entries_by_dir():

2282

# here we process each trees details for each item in the tree.

2283

# we first update any existing entries for the id at other paths,

2284

# then we either create or update the entry for the id at the

2285

# right path, and finally we add (if needed) a mapping from

2286

# file_id to this path. We do it in this order to allow us to

2287

# avoid checking all known paths for the id when generating a

2288

# new entry at this path: by adding the id->path mapping last,

2289

# all the mappings are valid and have correct relocation

2290

# records where needed.

2291

file_id = entry.file_id

2292

path_utf8 = path.encode('utf8')

2293

dirname, basename = osutils.split(path_utf8)

2294

new_entry_key = (dirname, basename, file_id)

2295

# tree index consistency: All other paths for this id in this tree

2296

# index must point to the correct path.

2297

for entry_key in id_index.setdefault(file_id, set()):

2298

# TODO:PROFILING: It might be faster to just update

2299

# rather than checking if we need to, and then overwrite

2300

# the one we are located at.

2301

if entry_key != new_entry_key:

2302

# this file id is at a different path in one of the

2303

# other trees, so put absent pointers there

2304

# This is the vertical axis in the matrix, all pointing

2305

# to the real path.

2306

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

2307

# by path consistency: Insert into an existing path record (trivial), or

2308

# add a new one with relocation pointers for the other tree indexes.

2309

if new_entry_key in id_index[file_id]:

2310

# there is already an entry where this data belongs, just insert it.

2311

by_path[new_entry_key][tree_index] = \

2312

self._inv_entry_to_details(entry)

2313

else:

2314

# add relocated entries to the horizontal axis - this row

2315

# mapping from path,id. We need to look up the correct path

2316

# for the indexes from 0 to tree_index -1

2317

new_details = []

2318

for lookup_index in xrange(tree_index):

2319

# boundary case: this is the first occurence of file_id

2320

# so there are no id_indexs, possibly take this out of

2321

# the loop?

2322

if not len(id_index[file_id]):

2323

new_details.append(DirState.NULL_PARENT_DETAILS)

2324

else:

2325

# grab any one entry, use it to find the right path.

2326

# TODO: optimise this to reduce memory use in highly

2327

# fragmented situations by reusing the relocation

2328

# records.

2329

a_key = iter(id_index[file_id]).next()

2330

if by_path[a_key][lookup_index][0] in ('r', 'a'):

2331

# its a pointer or missing statement, use it as is.

2332

new_details.append(by_path[a_key][lookup_index])

2333

else:

2334

# we have the right key, make a pointer to it.

2335

real_path = ('/'.join(a_key[0:2])).strip('/')

2336

new_details.append(('r', real_path, 0, False, ''))

2337

new_details.append(self._inv_entry_to_details(entry))

2338

new_details.extend(new_location_suffix)

2339

by_path[new_entry_key] = new_details

2340

id_index[file_id].add(new_entry_key)

2341

# --- end generation of full tree mappings

2342

2343

# sort and output all the entries

2344

new_entries = self._sort_entries(by_path.items())

2345

self._entries_to_current_state(new_entries)

2346

self._parents = [rev_id for rev_id, tree in trees]

2347

self._ghosts = list(ghosts)

2348

self._header_state = DirState.IN_MEMORY_MODIFIED

2349

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2350

self._id_index = id_index

2351

2352

def _sort_entries(self, entry_list):

2353

"""Given a list of entries, sort them into the right order.

2354

2355

This is done when constructing a new dirstate from trees - normally we

2356

try to keep everything in sorted blocks all the time, but sometimes

2357

it's easier to sort after the fact.

2358

"""

2359

def _key(entry):

2360

# sort by: directory parts, file name, file id

2361

return entry[0][0].split('/'), entry[0][1], entry[0][2]

2362

return sorted(entry_list, key=_key)

2363

2364

def set_state_from_inventory(self, new_inv):

2365

"""Set new_inv as the current state.

2366

2367

This API is called by tree transform, and will usually occur with

2368

existing parent trees.

2369

2370

:param new_inv: The inventory object to set current state from.

2371

"""

2372

if 'evil' in debug.debug_flags:

2373

trace.mutter_callsite(1,

2374

"set_state_from_inventory called; please mutate the tree instead")

2375

self._read_dirblocks_if_needed()

2376

# sketch:

2377

# Two iterators: current data and new data, both in dirblock order.

2378

# We zip them together, which tells about entries that are new in the

2379

# inventory, or removed in the inventory, or present in both and

2380

# possibly changed.

2381

2382

# You might think we could just synthesize a new dirstate directly

2383

# since we're processing it in the right order. However, we need to

2384

# also consider there may be any number of parent trees and relocation

2385

# pointers, and we don't want to duplicate that here.

2386

new_iterator = new_inv.iter_entries_by_dir()

2387

# we will be modifying the dirstate, so we need a stable iterator. In

2388

# future we might write one, for now we just clone the state into a

2389

# list - which is a shallow copy.

2390

old_iterator = iter(list(self._iter_entries()))

2391

# both must have roots so this is safe:

2392

current_new = new_iterator.next()

2393

current_old = old_iterator.next()

2394

def advance(iterator):

2395

try:

2396

return iterator.next()

2397

except StopIteration:

2398

return None

2399

while current_new or current_old:

2400

# skip entries in old that are not really there

2401

if current_old and current_old[1][0][0] in 'ar':

2402

# relocated or absent

2403

current_old = advance(old_iterator)

2404

continue

2405

if current_new:

2406

# convert new into dirblock style

2407

new_path_utf8 = current_new[0].encode('utf8')

2408

new_dirname, new_basename = osutils.split(new_path_utf8)

2409

new_id = current_new[1].file_id

2410

new_entry_key = (new_dirname, new_basename, new_id)

2411

current_new_minikind = \

2412

DirState._kind_to_minikind[current_new[1].kind]

2413

if current_new_minikind == 't':

2414

fingerprint = current_new[1].reference_revision or ''

2415

else:

2416

# We normally only insert or remove records, or update

2417

# them when it has significantly changed. Then we want to

2418

# erase its fingerprint. Unaffected records should

2419

# normally not be updated at all.

2420

fingerprint = ''

2421

else:

2422

# for safety disable variables

2423

new_path_utf8 = new_dirname = new_basename = new_id = \

2424

new_entry_key = None

2425

# 5 cases, we dont have a value that is strictly greater than everything, so

2426

# we make both end conditions explicit

2427

if not current_old:

2428

# old is finished: insert current_new into the state.

2429

self.update_minimal(new_entry_key, current_new_minikind,

2430

executable=current_new[1].executable,

2431

path_utf8=new_path_utf8, fingerprint=fingerprint)

2432

current_new = advance(new_iterator)

2433

elif not current_new:

2434

# new is finished

2435

self._make_absent(current_old)

2436

current_old = advance(old_iterator)

2437

elif new_entry_key == current_old[0]:

2438

# same - common case

2439

# We're looking at the same path and id in both the dirstate

2440

# and inventory, so just need to update the fields in the

2441

# dirstate from the one in the inventory.

2442

# TODO: update the record if anything significant has changed.

2443

# the minimal required trigger is if the execute bit or cached

2444

# kind has changed.

2445

if (current_old[1][0][3] != current_new[1].executable or

2446

current_old[1][0][0] != current_new_minikind):

2447

self.update_minimal(current_old[0], current_new_minikind,

2448

executable=current_new[1].executable,

2449

path_utf8=new_path_utf8, fingerprint=fingerprint)

2450

# both sides are dealt with, move on

2451

current_old = advance(old_iterator)

2452

current_new = advance(new_iterator)

2453

elif (cmp_by_dirs(new_dirname, current_old[0][0]) < 0

2454

or (new_dirname == current_old[0][0]

2455

and new_entry_key[1:] < current_old[0][1:])):

2456

# new comes before:

2457

# add a entry for this and advance new

2458

self.update_minimal(new_entry_key, current_new_minikind,

2459

executable=current_new[1].executable,

2460

path_utf8=new_path_utf8, fingerprint=fingerprint)

2461

current_new = advance(new_iterator)

2462

else:

2463

# we've advanced past the place where the old key would be,

2464

# without seeing it in the new list. so it must be gone.

2465

self._make_absent(current_old)

2466

current_old = advance(old_iterator)

2467

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2468

self._id_index = None

2469

self._packed_stat_index = None

2470

2471

def _make_absent(self, current_old):

2472

"""Mark current_old - an entry - as absent for tree 0.

2473

2474

:return: True if this was the last details entry for the entry key:

2475

that is, if the underlying block has had the entry removed, thus

2476

shrinking in length.

2477

"""

2478

# build up paths that this id will be left at after the change is made,

2479

# so we can update their cross references in tree 0

2480

all_remaining_keys = set()

2481

# Dont check the working tree, because it's going.

2482

for details in current_old[1][1:]:

2483

if details[0] not in 'ar': # absent, relocated

2484

all_remaining_keys.add(current_old[0])

2485

elif details[0] == 'r': # relocated

2486

# record the key for the real path.

2487

all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))

2488

# absent rows are not present at any path.

2489

last_reference = current_old[0] not in all_remaining_keys

2490

if last_reference:

2491

# the current row consists entire of the current item (being marked

2492

# absent), and relocated or absent entries for the other trees:

2493

# Remove it, its meaningless.

2494

block = self._find_block(current_old[0])

2495

entry_index, present = self._find_entry_index(current_old[0], block[1])

2496

if not present:

2497

raise AssertionError('could not find entry for %s' % (current_old,))

2498

block[1].pop(entry_index)

2499

# if we have an id_index in use, remove this key from it for this id.

2500

if self._id_index is not None:

2501

self._id_index[current_old[0][2]].remove(current_old[0])

2502

# update all remaining keys for this id to record it as absent. The

2503

# existing details may either be the record we are marking as deleted

2504

# (if there were other trees with the id present at this path), or may

2505

# be relocations.

2506

for update_key in all_remaining_keys:

2507

update_block_index, present = \

2508

self._find_block_index_from_key(update_key)

2509

if not present:

2510

raise AssertionError('could not find block for %s' % (update_key,))

2511

update_entry_index, present = \

2512

self._find_entry_index(update_key, self._dirblocks[update_block_index][1])

2513

if not present:

2514

raise AssertionError('could not find entry for %s' % (update_key,))

2515

update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]

2516

# it must not be absent at the moment

2517

if update_tree_details[0][0] == 'a': # absent

2518

raise AssertionError('bad row %r' % (update_tree_details,))

2519

update_tree_details[0] = DirState.NULL_PARENT_DETAILS

2520

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2521

return last_reference

2522

2523

def update_minimal(self, key, minikind, executable=False, fingerprint='',

2524

packed_stat=None, size=0, path_utf8=None):

2525

"""Update an entry to the state in tree 0.

2526

2527

This will either create a new entry at 'key' or update an existing one.

2528

It also makes sure that any other records which might mention this are

2529

updated as well.

2530

2531

:param key: (dir, name, file_id) for the new entry

2532

:param minikind: The type for the entry ('f' == 'file', 'd' ==

2533

'directory'), etc.

2534

:param executable: Should the executable bit be set?

2535

:param fingerprint: Simple fingerprint for new entry: canonical-form

2536

sha1 for files, referenced revision id for subtrees, etc.

2537

:param packed_stat: Packed stat value for new entry.

2538

:param size: Size information for new entry

2539

:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing

2540

extra computation.

2541

2542

If packed_stat and fingerprint are not given, they're invalidated in

2543

the entry.

2544

"""

2545

block = self._find_block(key)[1]

2546

if packed_stat is None:

2547

packed_stat = DirState.NULLSTAT

2548

# XXX: Some callers pass '' as the packed_stat, and it seems to be

2549

# sometimes present in the dirstate - this seems oddly inconsistent.

2550

# mbp 20071008

2551

entry_index, present = self._find_entry_index(key, block)

2552

new_details = (minikind, fingerprint, size, executable, packed_stat)

2553

id_index = self._get_id_index()

2554

if not present:

2555

# new entry, synthesis cross reference here,

2556

existing_keys = id_index.setdefault(key[2], set())

2557

if not existing_keys:

2558

# not currently in the state, simplest case

2559

new_entry = key, [new_details] + self._empty_parent_info()

2560

else:

2561

# present at one or more existing other paths.

2562

# grab one of them and use it to generate parent

2563

# relocation/absent entries.

2564

new_entry = key, [new_details]

2565

for other_key in existing_keys:

2566

# change the record at other to be a pointer to this new

2567

# record. The loop looks similar to the change to

2568

# relocations when updating an existing record but its not:

2569

# the test for existing kinds is different: this can be

2570

# factored out to a helper though.

2571

other_block_index, present = self._find_block_index_from_key(other_key)

2572

if not present:

2573

raise AssertionError('could not find block for %s' % (other_key,))

2574

other_entry_index, present = self._find_entry_index(other_key,

2575

self._dirblocks[other_block_index][1])

2576

if not present:

2577

raise AssertionError('could not find entry for %s' % (other_key,))

2578

if path_utf8 is None:

2579

raise AssertionError('no path')

2580

self._dirblocks[other_block_index][1][other_entry_index][1][0] = \

2581

('r', path_utf8, 0, False, '')

2582

2583

num_present_parents = self._num_present_parents()

2584

for lookup_index in xrange(1, num_present_parents + 1):

2585

# grab any one entry, use it to find the right path.

2586

# TODO: optimise this to reduce memory use in highly

2587

# fragmented situations by reusing the relocation

2588

# records.

2589

update_block_index, present = \

2590

self._find_block_index_from_key(other_key)

2591

if not present:

2592

raise AssertionError('could not find block for %s' % (other_key,))

2593

update_entry_index, present = \

2594

self._find_entry_index(other_key, self._dirblocks[update_block_index][1])

2595

if not present:

2596

raise AssertionError('could not find entry for %s' % (other_key,))

2597

update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]

2598

if update_details[0] in 'ar': # relocated, absent

2599

# its a pointer or absent in lookup_index's tree, use

2600

# it as is.

2601

new_entry[1].append(update_details)

2602

else:

2603

# we have the right key, make a pointer to it.

2604

pointer_path = osutils.pathjoin(*other_key[0:2])

2605

new_entry[1].append(('r', pointer_path, 0, False, ''))

2606

block.insert(entry_index, new_entry)

2607

existing_keys.add(key)

2608

else:

2609

# Does the new state matter?

2610

block[entry_index][1][0] = new_details

2611

# parents cannot be affected by what we do.

2612

# other occurences of this id can be found

2613

# from the id index.

2614

# ---

2615

# tree index consistency: All other paths for this id in this tree

2616

# index must point to the correct path. We have to loop here because

2617

# we may have passed entries in the state with this file id already

2618

# that were absent - where parent entries are - and they need to be

2619

# converted to relocated.

2620

if path_utf8 is None:

2621

raise AssertionError('no path')

2622

for entry_key in id_index.setdefault(key[2], set()):

2623

# TODO:PROFILING: It might be faster to just update

2624

# rather than checking if we need to, and then overwrite

2625

# the one we are located at.

2626

if entry_key != key:

2627

# this file id is at a different path in one of the

2628

# other trees, so put absent pointers there

2629

# This is the vertical axis in the matrix, all pointing

2630

# to the real path.

2631

block_index, present = self._find_block_index_from_key(entry_key)

2632

if not present:

2633

raise AssertionError('not present: %r', entry_key)

2634

entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])

2635

if not present:

2636

raise AssertionError('not present: %r', entry_key)

2637

self._dirblocks[block_index][1][entry_index][1][0] = \

2638

('r', path_utf8, 0, False, '')

2639

# add a containing dirblock if needed.

2640

if new_details[0] == 'd':

2641

subdir_key = (osutils.pathjoin(*key[0:2]), '', '')

2642

block_index, present = self._find_block_index_from_key(subdir_key)

2643

if not present:

2644

self._dirblocks.insert(block_index, (subdir_key[0], []))

2645

2646

self._dirblock_state = DirState.IN_MEMORY_MODIFIED

2647

2648

def _validate(self):

2649

"""Check that invariants on the dirblock are correct.

2650

2651

This can be useful in debugging; it shouldn't be necessary in

2652

normal code.

2653

2654

This must be called with a lock held.

2655

"""

2656

# NOTE: This must always raise AssertionError not just assert,

2657

# otherwise it may not behave properly under python -O

2658

2659

# TODO: All entries must have some content that's not 'a' or 'r',

2660

# otherwise it could just be removed.

2661

2662

# TODO: All relocations must point directly to a real entry.

2663

2664

# TODO: No repeated keys.

2665

2666

# -- mbp 20070325

2667

from pprint import pformat

2668

self._read_dirblocks_if_needed()

2669

if len(self._dirblocks) > 0:

2670

if not self._dirblocks[0][0] == '':

2671

raise AssertionError(

2672

"dirblocks don't start with root block:\n" + \

2673

pformat(self._dirblocks))

2674

if len(self._dirblocks) > 1:

2675

if not self._dirblocks[1][0] == '':

2676

raise AssertionError(

2677

"dirblocks missing root directory:\n" + \

2678

pformat(self._dirblocks))

2679

# the dirblocks are sorted by their path components, name, and dir id

2680

dir_names = [d[0].split('/')

2681

for d in self._dirblocks[1:]]

2682

if dir_names != sorted(dir_names):

2683

raise AssertionError(

2684

"dir names are not in sorted order:\n" + \

2685

pformat(self._dirblocks) + \

2686

"\nkeys:\n" +

2687

pformat(dir_names))

2688

for dirblock in self._dirblocks:

2689

# within each dirblock, the entries are sorted by filename and

2690

# then by id.

2691

for entry in dirblock[1]:

2692

if dirblock[0] != entry[0][0]:

2693

raise AssertionError(

2694

"entry key for %r"

2695

"doesn't match directory name in\n%r" %

2696

(entry, pformat(dirblock)))

2697

if dirblock[1] != sorted(dirblock[1]):

2698

raise AssertionError(

2699

"dirblock for %r is not sorted:\n%s" % \

2700

(dirblock[0], pformat(dirblock)))

2701

2702

def check_valid_parent():

2703

"""Check that the current entry has a valid parent.

2704

2705

This makes sure that the parent has a record,

2706

and that the parent isn't marked as "absent" in the

2707

current tree. (It is invalid to have a non-absent file in an absent

2708

directory.)

2709

"""

2710

if entry[0][0:2] == ('', ''):

2711

# There should be no parent for the root row

2712

return

2713

parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])

2714

if parent_entry == (None, None):

2715

raise AssertionError(

2716

"no parent entry for: %s in tree %s"

2717

% (this_path, tree_index))

2718

if parent_entry[1][tree_index][0] != 'd':

2719

raise AssertionError(

2720

"Parent entry for %s is not marked as a valid"

2721

" directory. %s" % (this_path, parent_entry,))

2722

2723

# For each file id, for each tree: either

2724

# the file id is not present at all; all rows with that id in the

2725

# key have it marked as 'absent'

2726

# OR the file id is present under exactly one name; any other entries

2727

# that mention that id point to the correct name.

2728

2729

# We check this with a dict per tree pointing either to the present

2730

# name, or None if absent.

2731

tree_count = self._num_present_parents() + 1

2732

id_path_maps = [dict() for i in range(tree_count)]

2733

# Make sure that all renamed entries point to the correct location.

2734

for entry in self._iter_entries():

2735

file_id = entry[0][2]

2736

this_path = osutils.pathjoin(entry[0][0], entry[0][1])

2737

if len(entry[1]) != tree_count:

2738

raise AssertionError(

2739

"wrong number of entry details for row\n%s" \

2740

",\nexpected %d" % \

2741

(pformat(entry), tree_count))

2742

absent_positions = 0

2743

for tree_index, tree_state in enumerate(entry[1]):

2744

this_tree_map = id_path_maps[tree_index]

2745

minikind = tree_state[0]

2746

if minikind in 'ar':

2747

absent_positions += 1

2748

# have we seen this id before in this column?

2749

if file_id in this_tree_map:

2750

previous_path, previous_loc = this_tree_map[file_id]

2751

# any later mention of this file must be consistent with

2752

# what was said before

2753

if minikind == 'a':

2754

if previous_path is not None:

2755

raise AssertionError(

2756

"file %s is absent in row %r but also present " \

2757

"at %r"% \

2758

(file_id, entry, previous_path))

2759

elif minikind == 'r':

2760

target_location = tree_state[1]

2761

if previous_path != target_location:

2762

raise AssertionError(

2763

"file %s relocation in row %r but also at %r" \

2764

% (file_id, entry, previous_path))

2765

else:

2766

# a file, directory, etc - may have been previously

2767

# pointed to by a relocation, which must point here

2768

if previous_path != this_path:

2769

raise AssertionError(

2770

"entry %r inconsistent with previous path %r "

2771

"seen at %r" %

2772

(entry, previous_path, previous_loc))

2773

check_valid_parent()

2774

else:

2775

if minikind == 'a':

2776

# absent; should not occur anywhere else

2777

this_tree_map[file_id] = None, this_path

2778

elif minikind == 'r':

2779

# relocation, must occur at expected location

2780

this_tree_map[file_id] = tree_state[1], this_path

2781

else:

2782

this_tree_map[file_id] = this_path, this_path

2783

check_valid_parent()

2784

if absent_positions == tree_count:

2785

raise AssertionError(

2786

"entry %r has no data for any tree." % (entry,))

2787

2788

def _wipe_state(self):

2789

"""Forget all state information about the dirstate."""

2790

self._header_state = DirState.NOT_IN_MEMORY

2791

self._dirblock_state = DirState.NOT_IN_MEMORY

2792

self._changes_aborted = False

2793

self._parents = []

2794

self._ghosts = []

2795

self._dirblocks = []

2796

self._id_index = None

2797

self._packed_stat_index = None

2798

self._end_of_header = None

2799

self._cutoff_time = None

2800

self._split_path_cache = {}

2801

2802

def lock_read(self):

2803

"""Acquire a read lock on the dirstate."""

2804

if self._lock_token is not None:

2805

raise errors.LockContention(self._lock_token)

2806

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2807

# already in memory, we could read just the header and check for

2808

# any modification. If not modified, we can just leave things

2809

# alone

2810

self._lock_token = lock.ReadLock(self._filename)

2811

self._lock_state = 'r'

2812

self._state_file = self._lock_token.f

2813

self._wipe_state()

2814

2815

def lock_write(self):

2816

"""Acquire a write lock on the dirstate."""

2817

if self._lock_token is not None:

2818

raise errors.LockContention(self._lock_token)

2819

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2820

# already in memory, we could read just the header and check for

2821

# any modification. If not modified, we can just leave things

2822

# alone

2823

self._lock_token = lock.WriteLock(self._filename)

2824

self._lock_state = 'w'

2825

self._state_file = self._lock_token.f

2826

self._wipe_state()

2827

2828

def unlock(self):

2829

"""Drop any locks held on the dirstate."""

2830

if self._lock_token is None:

2831

raise errors.LockNotHeld(self)

2832

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2833

# already in memory, we could read just the header and check for

2834

# any modification. If not modified, we can just leave things

2835

# alone

2836

self._state_file = None

2837

self._lock_state = None

2838

self._lock_token.unlock()

2839

self._lock_token = None

2840

self._split_path_cache = {}

2841

2842

def _requires_lock(self):

2843

"""Check that a lock is currently held by someone on the dirstate."""

2844

if not self._lock_token:

2845

raise errors.ObjectNotLocked(self)

2846

2847

2848

def py_update_entry(state, entry, abspath, stat_value,

2849

_stat_to_minikind=DirState._stat_to_minikind,

2850

_pack_stat=pack_stat):

2851

"""Update the entry based on what is actually on disk.

2852

2853

This function only calculates the sha if it needs to - if the entry is

2854

uncachable, or clearly different to the first parent's entry, no sha

2855

is calculated, and None is returned.

2856

2857

:param state: The dirstate this entry is in.

2858

:param entry: This is the dirblock entry for the file in question.

2859

:param abspath: The path on disk for this file.

2860

:param stat_value: The stat value done on the path.

2861

:return: None, or The sha1 hexdigest of the file (40 bytes) or link

2862

target of a symlink.

2863

"""

2864

try:

2865

minikind = _stat_to_minikind[stat_value.st_mode & 0170000]

2866

except KeyError:

2867

# Unhandled kind

2868

return None

2869

packed_stat = _pack_stat(stat_value)

2870

(saved_minikind, saved_link_or_sha1, saved_file_size,

2871

saved_executable, saved_packed_stat) = entry[1][0]

2872

2873

if minikind == 'd' and saved_minikind == 't':

2874

minikind = 't'

2875

if (minikind == saved_minikind

2876

and packed_stat == saved_packed_stat):

2877

# The stat hasn't changed since we saved, so we can re-use the

2878

# saved sha hash.

2879

if minikind == 'd':

2880

return None

2881

2882

# size should also be in packed_stat

2883

if saved_file_size == stat_value.st_size:

2884

return saved_link_or_sha1

2885

2886

# If we have gotten this far, that means that we need to actually

2887

# process this entry.

2888

link_or_sha1 = None

2889

if minikind == 'f':

2890

executable = state._is_executable(stat_value.st_mode,

2891

saved_executable)

2892

if state._cutoff_time is None:

2893

state._sha_cutoff_time()

2894

if (stat_value.st_mtime < state._cutoff_time

2895

and stat_value.st_ctime < state._cutoff_time

2896

and len(entry[1]) > 1

2897

and entry[1][1][0] != 'a'):

2898

# Could check for size changes for further optimised

2899

# avoidance of sha1's. However the most prominent case of

2900

# over-shaing is during initial add, which this catches.

2901

# Besides, if content filtering happens, size and sha

2902

# are calculated at the same time, so checking just the size

2903

# gains nothing w.r.t. performance.

2904

link_or_sha1 = state._sha1_file(abspath)

2905

entry[1][0] = ('f', link_or_sha1, stat_value.st_size,

2906

executable, packed_stat)

2907

else:

2908

entry[1][0] = ('f', '', stat_value.st_size,

2909

executable, DirState.NULLSTAT)

2910

elif minikind == 'd':

2911

link_or_sha1 = None

2912

entry[1][0] = ('d', '', 0, False, packed_stat)

2913

if saved_minikind != 'd':

2914

# This changed from something into a directory. Make sure we

2915

# have a directory block for it. This doesn't happen very

2916

# often, so this doesn't have to be super fast.

2917

block_index, entry_index, dir_present, file_present = \

2918

state._get_block_entry_index(entry[0][0], entry[0][1], 0)

2919

state._ensure_block(block_index, entry_index,

2920

osutils.pathjoin(entry[0][0], entry[0][1]))

2921

elif minikind == 'l':

2922

link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)

2923

if state._cutoff_time is None:

2924

state._sha_cutoff_time()

2925

if (stat_value.st_mtime < state._cutoff_time

2926

and stat_value.st_ctime < state._cutoff_time):

2927

entry[1][0] = ('l', link_or_sha1, stat_value.st_size,

2928

False, packed_stat)

2929

else:

2930

entry[1][0] = ('l', '', stat_value.st_size,

2931

False, DirState.NULLSTAT)

2932

state._dirblock_state = DirState.IN_MEMORY_MODIFIED

2933

return link_or_sha1

2934

update_entry = py_update_entry

2935

2936

2937

class ProcessEntryPython(object):

2938

2939

__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",

2940

"last_source_parent", "last_target_parent", "include_unchanged",

2941

"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",

2942

"search_specific_files", "state", "source_index", "target_index",

2943

"want_unversioned", "tree"]

2944

2945

def __init__(self, include_unchanged, use_filesystem_for_exec,

2946

search_specific_files, state, source_index, target_index,

2947

want_unversioned, tree):

2948

self.old_dirname_to_file_id = {}

2949

self.new_dirname_to_file_id = {}

2950

# Just a sentry, so that _process_entry can say that this

2951

# record is handled, but isn't interesting to process (unchanged)

2952

self.uninteresting = object()

2953

# Using a list so that we can access the values and change them in

2954

# nested scope. Each one is [path, file_id, entry]

2955

self.last_source_parent = [None, None]

2956

self.last_target_parent = [None, None]

2957

self.include_unchanged = include_unchanged

2958

self.use_filesystem_for_exec = use_filesystem_for_exec

2959

self.utf8_decode = cache_utf8._utf8_decode

2960

# for all search_indexs in each path at or under each element of

2961

# search_specific_files, if the detail is relocated: add the id, and add the

2962

# relocated path as one to search if its not searched already. If the

2963

# detail is not relocated, add the id.

2964

self.searched_specific_files = set()

2965

self.search_specific_files = search_specific_files

2966

self.state = state

2967

self.source_index = source_index

2968

self.target_index = target_index

2969

self.want_unversioned = want_unversioned

2970

self.tree = tree

2971

2972

def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):

2973

"""Compare an entry and real disk to generate delta information.

2974

2975

:param path_info: top_relpath, basename, kind, lstat, abspath for

2976

the path of entry. If None, then the path is considered absent.

2977

(Perhaps we should pass in a concrete entry for this ?)

2978

Basename is returned as a utf8 string because we expect this

2979

tuple will be ignored, and don't want to take the time to

2980

decode.

2981

:return: None if these don't match

2982

A tuple of information about the change, or

2983

the object 'uninteresting' if these match, but are

2984

basically identical.

2985

"""

2986

if self.source_index is None:

2987

source_details = DirState.NULL_PARENT_DETAILS

2988

else:

2989

source_details = entry[1][self.source_index]

2990

target_details = entry[1][self.target_index]

2991

target_minikind = target_details[0]

2992

if path_info is not None and target_minikind in 'fdlt':

2993

if not (self.target_index == 0):

2994

raise AssertionError()

2995

link_or_sha1 = update_entry(self.state, entry,

2996

abspath=path_info[4], stat_value=path_info[3])

2997

# The entry may have been modified by update_entry

2998

target_details = entry[1][self.target_index]

2999

target_minikind = target_details[0]

3000

else:

3001

link_or_sha1 = None

3002

file_id = entry[0][2]

3003

source_minikind = source_details[0]

3004

if source_minikind in 'fdltr' and target_minikind in 'fdlt':

3005

# claimed content in both: diff

3006

# r | fdlt | | add source to search, add id path move and perform

3007

# | | | diff check on source-target

3008

# r | fdlt | a | dangling file that was present in the basis.

3009

# | | | ???

3010

if source_minikind in 'r':

3011

# add the source to the search path to find any children it

3012

# has. TODO ? : only add if it is a container ?

3013

if not osutils.is_inside_any(self.searched_specific_files,

3014

source_details[1]):

3015

self.search_specific_files.add(source_details[1])

3016

# generate the old path; this is needed for stating later

3017

# as well.

3018

old_path = source_details[1]

3019

old_dirname, old_basename = os.path.split(old_path)

3020

path = pathjoin(entry[0][0], entry[0][1])

3021

old_entry = self.state._get_entry(self.source_index,

3022

path_utf8=old_path)

3023

# update the source details variable to be the real

3024

# location.

3025

if old_entry == (None, None):

3026

raise errors.CorruptDirstate(self.state._filename,

3027

"entry '%s/%s' is considered renamed from %r"

3028

" but source does not exist\n"

3029

"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))

3030

source_details = old_entry[1][self.source_index]

3031

source_minikind = source_details[0]

3032

else:

3033

old_dirname = entry[0][0]

3034

old_basename = entry[0][1]

3035

old_path = path = None

3036

if path_info is None:

3037

# the file is missing on disk, show as removed.

3038

content_change = True

3039

target_kind = None

3040

target_exec = False

3041

else:

3042

# source and target are both versioned and disk file is present.

3043

target_kind = path_info[2]

3044

if target_kind == 'directory':

3045

if path is None:

3046

old_path = path = pathjoin(old_dirname, old_basename)

3047

self.new_dirname_to_file_id[path] = file_id

3048

if source_minikind != 'd':

3049

content_change = True

3050

else:

3051

# directories have no fingerprint

3052

content_change = False

3053

target_exec = False

3054

elif target_kind == 'file':

3055

if source_minikind != 'f':

3056

content_change = True

3057

else:

3058

# If the size is the same, check the sha:

3059

if target_details[2] == source_details[2]:

3060

if link_or_sha1 is None:

3061

# Stat cache miss:

3062

statvalue, link_or_sha1 = \

3063

self.state._sha1_provider.stat_and_sha1(

3064

path_info[4])

3065

self.state._observed_sha1(entry, link_or_sha1,

3066

statvalue)

3067

content_change = (link_or_sha1 != source_details[1])

3068

else:

3069

# Size changed, so must be different

3070

content_change = True

3071

# Target details is updated at update_entry time

3072

if self.use_filesystem_for_exec:

3073

# We don't need S_ISREG here, because we are sure

3074

# we are dealing with a file.

3075

target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)

3076

else:

3077

target_exec = target_details[3]

3078

elif target_kind == 'symlink':

3079

if source_minikind != 'l':

3080

content_change = True

3081

else:

3082

content_change = (link_or_sha1 != source_details[1])

3083

target_exec = False

3084

elif target_kind == 'tree-reference':

3085

if source_minikind != 't':

3086

content_change = True

3087

else:

3088

content_change = False

3089

target_exec = False

3090

else:

3091

raise Exception, "unknown kind %s" % path_info[2]

3092

if source_minikind == 'd':

3093

if path is None:

3094

old_path = path = pathjoin(old_dirname, old_basename)

3095

self.old_dirname_to_file_id[old_path] = file_id

3096

# parent id is the entry for the path in the target tree

3097

if old_dirname == self.last_source_parent[0]:

3098

source_parent_id = self.last_source_parent[1]

3099

else:

3100

try:

3101

source_parent_id = self.old_dirname_to_file_id[old_dirname]

3102

except KeyError:

3103

source_parent_entry = self.state._get_entry(self.source_index,

3104

path_utf8=old_dirname)

3105

source_parent_id = source_parent_entry[0][2]

3106

if source_parent_id == entry[0][2]:

3107

# This is the root, so the parent is None

3108

source_parent_id = None

3109

else:

3110

self.last_source_parent[0] = old_dirname

3111

self.last_source_parent[1] = source_parent_id

3112

new_dirname = entry[0][0]

3113

if new_dirname == self.last_target_parent[0]:

3114

target_parent_id = self.last_target_parent[1]

3115

else:

3116

try:

3117

target_parent_id = self.new_dirname_to_file_id[new_dirname]

3118

except KeyError:

3119

# TODO: We don't always need to do the lookup, because the

3120

# parent entry will be the same as the source entry.

3121

target_parent_entry = self.state._get_entry(self.target_index,

3122

path_utf8=new_dirname)

3123

if target_parent_entry == (None, None):

3124

raise AssertionError(

3125

"Could not find target parent in wt: %s\nparent of: %s"

3126

% (new_dirname, entry))

3127

target_parent_id = target_parent_entry[0][2]

3128

if target_parent_id == entry[0][2]:

3129

# This is the root, so the parent is None

3130

target_parent_id = None

3131

else:

3132

self.last_target_parent[0] = new_dirname

3133

self.last_target_parent[1] = target_parent_id

3134

3135

source_exec = source_details[3]

3136

if (self.include_unchanged

3137

or content_change

3138

or source_parent_id != target_parent_id

3139

or old_basename != entry[0][1]

3140

or source_exec != target_exec

3141

3142

if old_path is None:

3143

old_path = path = pathjoin(old_dirname, old_basename)

3144

old_path_u = self.utf8_decode(old_path)[0]

3145

path_u = old_path_u

3146

else:

3147

old_path_u = self.utf8_decode(old_path)[0]

3148

if old_path == path:

3149

path_u = old_path_u

3150

else:

3151

path_u = self.utf8_decode(path)[0]

3152

source_kind = DirState._minikind_to_kind[source_minikind]

3153

return (entry[0][2],

3154

(old_path_u, path_u),

3155

content_change,

3156

(True, True),

3157

(source_parent_id, target_parent_id),

3158

(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),

3159

(source_kind, target_kind),

3160

(source_exec, target_exec))

3161

else:

3162

return self.uninteresting

3163

elif source_minikind in 'a' and target_minikind in 'fdlt':

3164

# looks like a new file

3165

path = pathjoin(entry[0][0], entry[0][1])

3166

# parent id is the entry for the path in the target tree

3167

# TODO: these are the same for an entire directory: cache em.

3168

parent_id = self.state._get_entry(self.target_index,

3169

path_utf8=entry[0][0])[0][2]

3170

if parent_id == entry[0][2]:

3171

parent_id = None

3172

if path_info is not None:

3173

# Present on disk:

3174

if self.use_filesystem_for_exec:

3175

# We need S_ISREG here, because we aren't sure if this

3176

# is a file or not.

3177

target_exec = bool(

3178

stat.S_ISREG(path_info[3].st_mode)

3179

and stat.S_IEXEC & path_info[3].st_mode)

3180

else:

3181

target_exec = target_details[3]

3182

return (entry[0][2],

3183

(None, self.utf8_decode(path)[0]),

3184

True,

3185

(False, True),

3186

(None, parent_id),

3187

(None, self.utf8_decode(entry[0][1])[0]),

3188

(None, path_info[2]),

3189

(None, target_exec))

3190

else:

3191

# Its a missing file, report it as such.

3192

return (entry[0][2],

3193

(None, self.utf8_decode(path)[0]),

3194

False,

3195

(False, True),

3196

(None, parent_id),

3197

(None, self.utf8_decode(entry[0][1])[0]),

3198

(None, None),

3199

(None, False))

3200

elif source_minikind in 'fdlt' and target_minikind in 'a':

3201

# unversioned, possibly, or possibly not deleted: we dont care.

3202

# if its still on disk, *and* theres no other entry at this

3203

# path [we dont know this in this routine at the moment -

3204

# perhaps we should change this - then it would be an unknown.

3205

old_path = pathjoin(entry[0][0], entry[0][1])

3206

# parent id is the entry for the path in the target tree

3207

parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]

3208

if parent_id == entry[0][2]:

3209

parent_id = None

3210

return (entry[0][2],

3211

(self.utf8_decode(old_path)[0], None),

3212

True,

3213

(True, False),

3214

(parent_id, None),

3215

(self.utf8_decode(entry[0][1])[0], None),

3216

(DirState._minikind_to_kind[source_minikind], None),

3217

(source_details[3], None))

3218

elif source_minikind in 'fdlt' and target_minikind in 'r':

3219

# a rename; could be a true rename, or a rename inherited from

3220

# a renamed parent. TODO: handle this efficiently. Its not

3221

# common case to rename dirs though, so a correct but slow

3222

# implementation will do.

3223

if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):

3224

self.search_specific_files.add(target_details[1])

3225

elif source_minikind in 'ra' and target_minikind in 'ra':

3226

# neither of the selected trees contain this file,

3227

# so skip over it. This is not currently directly tested, but

3228

# is indirectly via test_too_much.TestCommands.test_conflicts.

3229

pass

3230

else:

3231

raise AssertionError("don't know how to compare "

3232

"source_minikind=%r, target_minikind=%r"

3233

% (source_minikind, target_minikind))

3234

## import pdb;pdb.set_trace()

3235

return None

3236

3237

def __iter__(self):

3238

return self

3239

3240

def iter_changes(self):

3241

"""Iterate over the changes."""

3242

utf8_decode = cache_utf8._utf8_decode

3243

_cmp_by_dirs = cmp_by_dirs

3244

_process_entry = self._process_entry

3245

uninteresting = self.uninteresting

3246

search_specific_files = self.search_specific_files

3247

searched_specific_files = self.searched_specific_files

3248

splitpath = osutils.splitpath

3249

# sketch:

3250

# compare source_index and target_index at or under each element of search_specific_files.

3251

# follow the following comparison table. Note that we only want to do diff operations when

3252

# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo

3253

# for the target.

3254

# cases:

3255

3256

# Source | Target | disk | action

3257

# r | fdlt | | add source to search, add id path move and perform

3258

# | | | diff check on source-target

3259

# r | fdlt | a | dangling file that was present in the basis.

3260

# | | | ???

3261

# r | a | | add source to search

3262

# r | a | a |

3263

# r | r | | this path is present in a non-examined tree, skip.

3264

# r | r | a | this path is present in a non-examined tree, skip.

3265

# a | fdlt | | add new id

3266

# a | fdlt | a | dangling locally added file, skip

3267

# a | a | | not present in either tree, skip

3268

# a | a | a | not present in any tree, skip

3269

# a | r | | not present in either tree at this path, skip as it

3270

# | | | may not be selected by the users list of paths.

3271

# a | r | a | not present in either tree at this path, skip as it

3272

# | | | may not be selected by the users list of paths.

3273

# fdlt | fdlt | | content in both: diff them

3274

# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?

3275

# fdlt | a | | unversioned: output deleted id for now

3276

# fdlt | a | a | unversioned and deleted: output deleted id

3277

# fdlt | r | | relocated in this tree, so add target to search.

3278

# | | | Dont diff, we will see an r,fd; pair when we reach

3279

# | | | this id at the other path.

3280

# fdlt | r | a | relocated in this tree, so add target to search.

3281

# | | | Dont diff, we will see an r,fd; pair when we reach

3282

# | | | this id at the other path.

3283

3284

# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by

3285

# keeping a cache of directories that we have seen.

3286

3287

while search_specific_files:

3288

# TODO: the pending list should be lexically sorted? the

3289

# interface doesn't require it.

3290

current_root = search_specific_files.pop()

3291

current_root_unicode = current_root.decode('utf8')

3292

searched_specific_files.add(current_root)

3293

# process the entries for this containing directory: the rest will be

3294

# found by their parents recursively.

3295

root_entries = self.state._entries_for_path(current_root)

3296

root_abspath = self.tree.abspath(current_root_unicode)

3297

try:

3298

root_stat = os.lstat(root_abspath)

3299

except OSError, e:

3300

if e.errno == errno.ENOENT:

3301

# the path does not exist: let _process_entry know that.

3302

root_dir_info = None

3303

else:

3304

# some other random error: hand it up.

3305

raise

3306

else:

3307

root_dir_info = ('', current_root,

3308

osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,

3309

root_abspath)

3310

if root_dir_info[2] == 'directory':

3311

if self.tree._directory_is_tree_reference(

3312

current_root.decode('utf8')):

3313

root_dir_info = root_dir_info[:2] + \

3314

('tree-reference',) + root_dir_info[3:]

3315

3316

if not root_entries and not root_dir_info:

3317

# this specified path is not present at all, skip it.

3318

continue

3319

path_handled = False

3320

for entry in root_entries:

3321

result = _process_entry(entry, root_dir_info)

3322

if result is not None:

3323

path_handled = True

3324

if result is not uninteresting:

3325

yield result

3326

if self.want_unversioned and not path_handled and root_dir_info:

3327

new_executable = bool(

3328

stat.S_ISREG(root_dir_info[3].st_mode)

3329

and stat.S_IEXEC & root_dir_info[3].st_mode)

3330

yield (None,

3331

(None, current_root_unicode),

3332

True,

3333

(False, False),

3334

(None, None),

3335

(None, splitpath(current_root_unicode)[-1]),

3336

(None, root_dir_info[2]),

3337

(None, new_executable)

3338

)

3339

initial_key = (current_root, '', '')

3340

block_index, _ = self.state._find_block_index_from_key(initial_key)

3341

if block_index == 0:

3342

# we have processed the total root already, but because the

3343

# initial key matched it we should skip it here.

3344

block_index +=1

3345

if root_dir_info and root_dir_info[2] == 'tree-reference':

3346

current_dir_info = None

3347

else:

3348

dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)

3349

try:

3350

current_dir_info = dir_iterator.next()

3351

except OSError, e:

3352

# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but

3353

# python 2.5 has e.errno == EINVAL,

3354

# and e.winerror == ERROR_DIRECTORY

3355

e_winerror = getattr(e, 'winerror', None)

3356

win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)

3357

# there may be directories in the inventory even though

3358

# this path is not a file on disk: so mark it as end of

3359

# iterator

3360

if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):

3361

current_dir_info = None

3362

elif (sys.platform == 'win32'

3363

and (e.errno in win_errors

3364

or e_winerror in win_errors)):

3365

current_dir_info = None

3366

else:

3367

raise

3368

else:

3369

if current_dir_info[0][0] == '':

3370

# remove .bzr from iteration

3371

bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))

3372

if current_dir_info[1][bzr_index][0] != '.bzr':

3373

raise AssertionError()

3374

del current_dir_info[1][bzr_index]

3375

# walk until both the directory listing and the versioned metadata

3376

# are exhausted.

3377

if (block_index < len(self.state._dirblocks) and

3378

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3379

current_block = self.state._dirblocks[block_index]

3380

else:

3381

current_block = None

3382

while (current_dir_info is not None or

3383

current_block is not None):

3384

if (current_dir_info and current_block

3385

and current_dir_info[0][0] != current_block[0]):

3386

if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:

3387

# filesystem data refers to paths not covered by the dirblock.

3388

# this has two possibilities:

3389

# A) it is versioned but empty, so there is no block for it

3390

# B) it is not versioned.

3391

3392

# if (A) then we need to recurse into it to check for

3393

# new unknown files or directories.

3394

# if (B) then we should ignore it, because we don't

3395

# recurse into unknown directories.

3396

path_index = 0

3397

while path_index < len(current_dir_info[1]):

3398

current_path_info = current_dir_info[1][path_index]

3399

if self.want_unversioned:

3400

if current_path_info[2] == 'directory':

3401

if self.tree._directory_is_tree_reference(

3402

current_path_info[0].decode('utf8')):

3403

current_path_info = current_path_info[:2] + \

3404

('tree-reference',) + current_path_info[3:]

3405

new_executable = bool(

3406

stat.S_ISREG(current_path_info[3].st_mode)

3407

and stat.S_IEXEC & current_path_info[3].st_mode)

3408

yield (None,

3409

(None, utf8_decode(current_path_info[0])[0]),

3410

True,

3411

(False, False),

3412

(None, None),

3413

(None, utf8_decode(current_path_info[1])[0]),

3414

(None, current_path_info[2]),

3415

(None, new_executable))

3416

# dont descend into this unversioned path if it is

3417

# a dir

3418

if current_path_info[2] in ('directory',

3419

'tree-reference'):

3420

del current_dir_info[1][path_index]

3421

path_index -= 1

3422

path_index += 1

3423

3424

# This dir info has been handled, go to the next

3425

try:

3426

current_dir_info = dir_iterator.next()

3427

except StopIteration:

3428

current_dir_info = None

3429

else:

3430

# We have a dirblock entry for this location, but there

3431

# is no filesystem path for this. This is most likely

3432

# because a directory was removed from the disk.

3433

# We don't have to report the missing directory,

3434

# because that should have already been handled, but we

3435

# need to handle all of the files that are contained

3436

# within.

3437

for current_entry in current_block[1]:

3438

# entry referring to file not present on disk.

3439

# advance the entry only, after processing.

3440

result = _process_entry(current_entry, None)

3441

if result is not None:

3442

if result is not uninteresting:

3443

yield result

3444

block_index +=1

3445

if (block_index < len(self.state._dirblocks) and

3446

osutils.is_inside(current_root,

3447

self.state._dirblocks[block_index][0])):

3448

current_block = self.state._dirblocks[block_index]

3449

else:

3450

current_block = None

3451

continue

3452

entry_index = 0

3453

if current_block and entry_index < len(current_block[1]):

3454

current_entry = current_block[1][entry_index]

3455

else:

3456

current_entry = None

3457

advance_entry = True

3458

path_index = 0

3459

if current_dir_info and path_index < len(current_dir_info[1]):

3460

current_path_info = current_dir_info[1][path_index]

3461

if current_path_info[2] == 'directory':

3462

if self.tree._directory_is_tree_reference(

3463

current_path_info[0].decode('utf8')):

3464

current_path_info = current_path_info[:2] + \

3465

('tree-reference',) + current_path_info[3:]

3466

else:

3467

current_path_info = None

3468

advance_path = True

3469

path_handled = False

3470

while (current_entry is not None or

3471

current_path_info is not None):

3472

if current_entry is None:

3473

# the check for path_handled when the path is advanced

3474

# will yield this path if needed.

3475

pass

3476

elif current_path_info is None:

3477

# no path is fine: the per entry code will handle it.

3478

result = _process_entry(current_entry, current_path_info)

3479

if result is not None:

3480

if result is not uninteresting:

3481

yield result

3482

elif (current_entry[0][1] != current_path_info[1]

3483

or current_entry[1][self.target_index][0] in 'ar'):

3484

# The current path on disk doesn't match the dirblock

3485

# record. Either the dirblock is marked as absent, or

3486

# the file on disk is not present at all in the

3487

# dirblock. Either way, report about the dirblock

3488

# entry, and let other code handle the filesystem one.

3489

3490

# Compare the basename for these files to determine

3491

# which comes first

3492

if current_path_info[1] < current_entry[0][1]:

3493

# extra file on disk: pass for now, but only

3494

# increment the path, not the entry

3495

advance_entry = False

3496

else:

3497

# entry referring to file not present on disk.

3498

# advance the entry only, after processing.

3499

result = _process_entry(current_entry, None)

3500

if result is not None:

3501

if result is not uninteresting:

3502

yield result

3503

advance_path = False

3504

else:

3505

result = _process_entry(current_entry, current_path_info)

3506

if result is not None:

3507

path_handled = True

3508

if result is not uninteresting:

3509

yield result

3510

if advance_entry and current_entry is not None:

3511

entry_index += 1

3512

if entry_index < len(current_block[1]):

3513

current_entry = current_block[1][entry_index]

3514

else:

3515

current_entry = None

3516

else:

3517

advance_entry = True # reset the advance flaga

3518

if advance_path and current_path_info is not None:

3519

if not path_handled:

3520

# unversioned in all regards

3521

if self.want_unversioned:

3522

new_executable = bool(

3523

stat.S_ISREG(current_path_info[3].st_mode)

3524

and stat.S_IEXEC & current_path_info[3].st_mode)

3525

try:

3526

relpath_unicode = utf8_decode(current_path_info[0])[0]

3527

except UnicodeDecodeError:

3528

raise errors.BadFilenameEncoding(

3529

current_path_info[0], osutils._fs_enc)

3530

yield (None,

3531

(None, relpath_unicode),

3532

True,

3533

(False, False),

3534

(None, None),

3535

(None, utf8_decode(current_path_info[1])[0]),

3536

(None, current_path_info[2]),

3537

(None, new_executable))

3538

# dont descend into this unversioned path if it is

3539

# a dir

3540

if current_path_info[2] in ('directory'):

3541

del current_dir_info[1][path_index]

3542

path_index -= 1

3543

# dont descend the disk iterator into any tree

3544

# paths.

3545

if current_path_info[2] == 'tree-reference':

3546

del current_dir_info[1][path_index]

3547

path_index -= 1

3548

path_index += 1

3549

if path_index < len(current_dir_info[1]):

3550

current_path_info = current_dir_info[1][path_index]

3551

if current_path_info[2] == 'directory':

3552

if self.tree._directory_is_tree_reference(

3553

current_path_info[0].decode('utf8')):

3554

current_path_info = current_path_info[:2] + \

3555

('tree-reference',) + current_path_info[3:]

3556

else:

3557

current_path_info = None

3558

path_handled = False

3559

else:

3560

advance_path = True # reset the advance flagg.

3561

if current_block is not None:

3562

block_index += 1

3563

if (block_index < len(self.state._dirblocks) and

3564

osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):

3565

current_block = self.state._dirblocks[block_index]

3566

else:

3567

current_block = None

3568

if current_dir_info is not None:

3569

try:

3570

current_dir_info = dir_iterator.next()

3571

except StopIteration:

3572

current_dir_info = None

3573

_process_entry = ProcessEntryPython

3574

3575

3576

# Try to load the compiled form if possible

3577

try:

3578

from bzrlib._dirstate_helpers_c import (

3579

_read_dirblocks_c as _read_dirblocks,

3580

bisect_dirblock_c as bisect_dirblock,

3581

_bisect_path_left_c as _bisect_path_left,

3582

_bisect_path_right_c as _bisect_path_right,

3583

cmp_by_dirs_c as cmp_by_dirs,

3584

ProcessEntryC as _process_entry,

3585

update_entry as update_entry,

3586

)

3587

except ImportError:

3588

from bzrlib._dirstate_helpers_py import (

3589

_read_dirblocks_py as _read_dirblocks,

3590

bisect_dirblock_py as bisect_dirblock,

3591

_bisect_path_left_py as _bisect_path_left,

3592

_bisect_path_right_py as _bisect_path_right,

3593

cmp_by_dirs_py as cmp_by_dirs,

3594

)

Older »