~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Wouter van Heyst
Date: 2006-06-07 16:05:27 UTC
mto: This revision was merged to the branch mainline in revision 1752.
Revision ID: larstiq@larstiq.dyndns.org-20060607160527-2b3649154d0e2e84

more code cleanup

files added:
NEWS.developers

build-api

bzrlib/tests/test_doc_generate.py

bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

foo bar

patience-test.py

files removed:
COPYING.txt

bzr.ico

bzrlib/api.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/branchbuilder.py

bzrlib/breakin.py

bzrlib/bugtracker.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/help_topics.py

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/mutabletree.py

bzrlib/pack.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/revisiontree.py

bzrlib/smart

bzrlib/smart/__init__.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/smtp_connection.py

bzrlib/strace.py

bzrlib/tag.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_create_checkout.py

bzrlib/tests/branch_implementations/test_get_revision_id_to_revno_map.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_lock

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_lock/test_temporary_write_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_counted_lock.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_info.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_file_mtime.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/brokenrename.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/util/bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/workingtree_4.py

bzrlib/xml6.py

bzrlib/xml7.py

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/default.css

doc/developers

doc/developers/add.txt

doc/developers/annotate.txt

doc/developers/api-versioning.txt

doc/developers/bundle-creation.txt

doc/developers/bundles.txt

doc/developers/commit.txt

doc/developers/container-format.txt

doc/developers/dirstate.txt

doc/developers/gc.txt

doc/developers/incremental-push-pull.txt

doc/developers/index.txt

doc/developers/initial-push-pull.txt

doc/developers/merge-scaling.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-roadmap.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/performance.dot

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/profiling.txt

doc/developers/revert.txt

doc/developers/scratch.txt

doc/developers/status.txt

doc/developers/uncommit.txt

doc/http_smart_server.txt

doc/index.txt

doc/server.txt

doc/shared_repository_layouts.txt

doc/version_info.txt

man1

profile_imports.py

tools/bzr_epydoc

tools/bzr_epydoc_uid.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files renamed:
doc/developers/HACKING => HACKING

bzrlib/bundle/bundle_data.py => bzrlib/bundle/read_bundle.py

bzrlib/bundle/serializer/v08.py => bzrlib/bundle/serializer/v07.py

bzrlib/deprecated_graph.py => bzrlib/graph.py

bzrlib/tests/test_commands.py => bzrlib/tests/test_command.py

bzrlib/tests/test_deprecated_graph.py => bzrlib/tests/test_graph.py

bzrlib/tests/repository_implementations/test_revision.py => bzrlib/tests/test_revprops.py

bzrlib/win32utils.py => bzrlib/win32console.py

files modified:
.bzrignore

BRANCH.TODO

INSTALL

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/bash/bzr.simple

contrib/newinventory.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

setup.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import *

from bzrlib.trace import mutter

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

104

105

# TODO: Split out code specific to this format into an associated object.

107

# TODO: Can we put in some kind of value to check that the index and data

108

# files belong together?

109

110

# TODO: accommodate binaries, perhaps by storing a byte count

# TODO: accomodate binaries, perhaps by storing a byte count

111

112

# TODO: function to check whole file

113

127

109

128

110

def annotate_iter(self):

129

111

"""Yield tuples of (origin, text) for each content line."""

130

return iter(self._lines)

112

for origin, text in self._lines:

113

yield origin, text

131

114

132

115

def annotate(self):

133

116

"""Return a list of (origin, text) tuples."""

135

118

136

119

def line_delta_iter(self, new_lines):

137

120

"""Generate line-based delta from this content to new_lines."""

138

new_texts = new_lines.text()

139

old_texts = self.text()

121

new_texts = [text for origin, text in new_lines._lines]

122

old_texts = [text for origin, text in self._lines]

140

123

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

124

for op in s.get_opcodes():

125

if op[0] == 'equal':

143

126

continue

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

127

# ofrom oto length data

128

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

146

129

147

130

def line_delta(self, new_lines):

148

131

return list(self.line_delta_iter(new_lines))

150

133

def text(self):

151

134

return [text for origin, text in self._lines]

152

135

153

def copy(self):

154

return KnitContent(self._lines[:])

155

156

136

157

137

class _KnitFactory(object):

158

138

"""Base factory for creating content objects."""

159

139

160

def make(self, lines, version_id):

140

def make(self, lines, version):

161

141

num_lines = len(lines)

162

return KnitContent(zip([version_id] * num_lines, lines))

142

return KnitContent(zip([version] * num_lines, lines))

163

143

164

144

165

145

class KnitAnnotateFactory(_KnitFactory):

167

147

168

148

annotated = True

169

149

170

def parse_fulltext(self, content, version_id):

150

def parse_fulltext(self, content, version):

171

151

"""Convert fulltext to internal representation

172

152

173

153

fulltext content is of the format

175

155

internal representation is of the format:

176

156

(revid, plaintext)

177

157

"""

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

158

lines = []

159

for line in content:

160

origin, text = line.split(' ', 1)

161

lines.append((origin.decode('utf-8'), text))

183

162

return KnitContent(lines)

184

163

185

164

def parse_line_delta_iter(self, lines):

186

return iter(self.parse_line_delta(lines))

165

for result_item in self.parse_line_delta[lines]:

166

yield result_item

187

167

188

def parse_line_delta(self, lines, version_id):

168

def parse_line_delta(self, lines, version):

189

169

"""Convert a line based delta into internal representation.

190

170

191

171

line delta is in the form of:

192

172

intstart intend intcount

193

173

1..count lines:

194

174

revid(utf8) newline\n

195

internal representation is

175

internal represnetation is

196

176

(start, end, count, [1..count tuples (revid, newline)])

197

177

"""

198

178

result = []

199

179

lines = iter(lines)

200

180

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

207

181

# walk through the lines parsing.

208

182

for header in lines:

209

183

start, end, count = [int(n) for n in header.split(',')]

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

184

contents = []

185

remaining = count

186

while remaining:

187

origin, text = next().split(' ', 1)

188

remaining -= 1

189

contents.append((origin.decode('utf-8'), text))

211

190

result.append((start, end, count, contents))

212

191

return result

213

192

214

def get_fulltext_content(self, lines):

215

"""Extract just the content lines from a fulltext."""

216

return (line.split(' ', 1)[1] for line in lines)

217

218

def get_linedelta_content(self, lines):

219

"""Extract just the content from a line delta.

220

221

This doesn't return all of the extra information stored in a delta.

222

Only the actual content lines.

223

"""

224

lines = iter(lines)

225

next = lines.next

226

for header in lines:

227

header = header.split(',')

228

count = int(header[2])

229

for i in xrange(count):

230

origin, text = next().split(' ', 1)

231

yield text

232

233

193

def lower_fulltext(self, content):

234

194

"""convert a fulltext content record into a serializable form.

235

195

236

196

see parse_fulltext which this inverts.

237

197

"""

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

198

return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]

241

199

242

200

def lower_line_delta(self, delta):

243

201

"""convert a delta into a serializable form.

244

202

245

203

See parse_line_delta which this inverts.

246

204

"""

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

249

205

out = []

250

206

for start, end, c, lines in delta:

251

207

out.append('%d,%d,%d\n' % (start, end, c))

252

out.extend(origin + ' ' + text

253

for origin, text in lines)

208

for origin, text in lines:

209

out.append('%s %s' % (origin.encode('utf-8'), text))

254

210

return out

255

211

256

212

259

215

260

216

annotated = False

261

217

262

def parse_fulltext(self, content, version_id):

218

def parse_fulltext(self, content, version):

263

219

"""This parses an unannotated fulltext.

264

220

265

221

Note that this is not a noop - the internal representation

266

222

has (versionid, line) - its just a constant versionid.

267

223

"""

268

return self.make(content, version_id)

224

return self.make(content, version)

269

225

270

def parse_line_delta_iter(self, lines, version_id):

271

cur = 0

272

num_lines = len(lines)

273

while cur < num_lines:

274

header = lines[cur]

275

cur += 1

226

def parse_line_delta_iter(self, lines, version):

227

while lines:

228

header = lines.pop(0)

276

229

start, end, c = [int(n) for n in header.split(',')]

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

278

cur += c

279

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

282

283

def get_fulltext_content(self, lines):

284

"""Extract just the content lines from a fulltext."""

285

return iter(lines)

286

287

def get_linedelta_content(self, lines):

288

"""Extract just the content from a line delta.

289

290

This doesn't return all of the extra information stored in a delta.

291

Only the actual content lines.

292

"""

293

lines = iter(lines)

294

next = lines.next

295

for header in lines:

296

header = header.split(',')

297

count = int(header[2])

298

for i in xrange(count):

299

yield next()

300

230

yield start, end, c, zip([version] * c, lines[:c])

231

del lines[:c]

232

233

def parse_line_delta(self, lines, version):

234

return list(self.parse_line_delta_iter(lines, version))

235

301

236

def lower_fulltext(self, content):

302

237

return content.text()

303

238

330

265

stored and retrieved.

331

266

"""

332

267

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None):

268

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

269

basis_knit=None, delta=True, create=False):

337

270

"""Construct a knit at location specified by relpath.

338

271

339

272

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

345

273

"""

346

if deprecated_passed(basis_knit):

347

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

348

" deprecated as of bzr 0.9.",

349

DeprecationWarning, stacklevel=2)

350

274

if access_mode is None:

351

275

access_mode = 'w'

352

276

super(KnitVersionedFile, self).__init__(access_mode)

353

277

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

278

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

279

type(basis_knit)

280

354

281

self.transport = transport

355

282

self.filename = relpath

283

self.basis_knit = basis_knit

356

284

self.factory = factory or KnitAnnotateFactory()

357

285

self.writable = (access_mode == 'w')

358

286

self.delta = delta

359

287

360

self._max_delta_chain = 200

361

362

288

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

363

access_mode, create=create, file_mode=file_mode,

364

create_parent_dir=create_parent_dir, delay_create=delay_create,

365

dir_mode=dir_mode)

289

access_mode, create=create, file_mode=file_mode)

366

290

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

367

access_mode, create=create and not len(self), file_mode=file_mode,

368

create_parent_dir=create_parent_dir, delay_create=delay_create,

369

dir_mode=dir_mode)

291

access_mode, create=create and not len(self), file_mode=file_mode)

370

292

371

293

def __repr__(self):

372

294

return '%s(%s)' % (self.__class__.__name__,

373

295

self.transport.abspath(self.filename))

374

296

375

def _check_should_delta(self, first_parents):

376

"""Iterate back through the parent listing, looking for a fulltext.

377

378

This is used when we want to decide whether to add a delta or a new

379

fulltext. It searches for _max_delta_chain parents. When it finds a

380

fulltext parent, it sees if the total size of the deltas leading up to

381

it is large enough to indicate that we want a new full text anyway.

382

383

Return True if we should create a new delta, False if we should use a

384

full text.

385

"""

386

delta_size = 0

387

fulltext_size = None

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

394

fulltext_size = size

395

break

396

delta_size += size

397

delta_parents = self._index.get_parents(parent)

398

else:

399

# We couldn't find a fulltext, so we must create a new one

400

return False

401

402

return fulltext_size > delta_size

403

404

297

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

405

298

"""See VersionedFile._add_delta()."""

406

299

self._check_add(version_id, []) # should we check the lines ?

438

331

# To speed the extract of texts the delta chain is limited

439

332

# to a fixed number of deltas. This should minimize both

440

333

# I/O and the time spend applying deltas.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

334

count = 0

335

delta_parents = [delta_parent]

336

while count < 25:

337

parent = delta_parents[0]

338

method = self._index.get_method(parent)

339

if method == 'fulltext':

340

break

341

delta_parents = self._index.get_parents(parent)

342

count = count + 1

343

if method == 'line-delta':

344

# did not find a fulltext in the delta limit.

345

# just do a normal insertion.

446

346

return super(KnitVersionedFile, self)._add_delta(version_id,

447

347

parents,

448

348

delta_parent,

462

362

:param records: A list of tuples(version_id, options, parents, size).

463

363

:param data: The data for the records. When it is written, the records

464

364

are adjusted to have pos pointing into data by the sum of

465

the preceding records sizes.

365

the preceeding records sizes.

466

366

"""

467

367

# write all the data

468

368

pos = self._data.add_raw_record(data)

469

offset = 0

470

369

index_entries = []

471

370

for (version_id, options, parents, size) in records:

472

index_entries.append((version_id, options, pos+offset,

473

size, parents))

474

if self._data._do_cache:

475

self._data._cache[version_id] = data[offset:offset+size]

476

offset += size

371

index_entries.append((version_id, options, pos, size, parents))

372

pos += size

477

373

self._index.add_versions(index_entries)

478

374

479

def enable_cache(self):

480

"""Start caching data for this knit"""

481

self._data.enable_cache()

482

483

375

def clear_cache(self):

484

376

"""Clear the data cache only."""

485

377

self._data.clear_cache()

488

380

"""See VersionedFile.copy_to()."""

489

381

# copy the current index to a temp index to avoid racing with local

490

382

# writes

491

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

492

self.transport.get(self._index._filename))

383

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)

493

384

# copy the data file

494

f = self._data._open_file()

495

try:

496

transport.put_file(name + DATA_SUFFIX, f)

497

finally:

498

f.close()

499

# move the copied index into place

500

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

385

transport.put(name + DATA_SUFFIX, self._data._open_file())

386

# rename the copied index into place

387

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

501

388

502

389

def create_empty(self, name, transport, mode=None):

503

return KnitVersionedFile(name, transport, factory=self.factory,

504

delta=self.delta, create=True)

390

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

505

391

506

def _fix_parents(self, version_id, new_parents):

392

def _fix_parents(self, version, new_parents):

507

393

"""Fix the parents list for version.

508

394

509

395

This is done by appending a new version to the index

511

397

the parents list must be a superset of the current

512

398

list.

513

399

"""

514

current_values = self._index._cache[version_id]

400

current_values = self._index._cache[version]

515

401

assert set(current_values[4]).difference(set(new_parents)) == set()

516

self._index.add_version(version_id,

402

self._index.add_version(version,

517

403

current_values[1],

518

404

current_values[2],

519

405

current_values[3],

521

407

522

408

def get_delta(self, version_id):

523

409

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

526

410

if not self.has_version(version_id):

527

411

raise RevisionNotPresent(version_id, self.filename)

528

412

533

417

parent = None

534

418

data_pos, data_size = self._index.get_position(version_id)

535

419

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

420

version_idx = self._index.lookup(version_id)

536

421

noeol = 'no-eol' in self._index.get_options(version_id)

537

422

if 'fulltext' == self._index.get_method(version_id):

538

new_content = self.factory.parse_fulltext(data, version_id)

423

new_content = self.factory.parse_fulltext(data, version_idx)

539

424

if parent is not None:

540

425

reference_content = self._get_content(parent)

541

426

old_texts = reference_content.text()

545

430

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

546

431

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

547

432

else:

548

delta = self.factory.parse_line_delta(data, version_id)

433

delta = self.factory.parse_line_delta(data, version_idx)

549

434

return parent, sha1, noeol, delta

550

435

551

436

def get_graph_with_ghosts(self):

555

440

556

441

def get_sha1(self, version_id):

557

442

"""See VersionedFile.get_sha1()."""

558

version_id = osutils.safe_revision_id(version_id)

559

record_map = self._get_record_map([version_id])

560

method, content, digest, next = record_map[version_id]

561

return digest

443

components = self._get_components(version_id)

444

return components[-1][-1][-1]

562

445

563

446

@staticmethod

564

447

def get_suffixes():

567

450

568

451

def has_ghost(self, version_id):

569

452

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

571

453

# maybe we have it

572

454

if self.has_version(version_id):

573

455

return False

586

468

587

469

def has_version(self, version_id):

588

470

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

590

471

return self._index.has_version(version_id)

591

472

592

473

__contains__ = has_version

600

481

delta_seq = None

601

482

for parent_id in parents:

602

483

merge_content = self._get_content(parent_id, parent_texts)

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

484

seq = KnitSequenceMatcher(None, merge_content.text(), content.text())

605

485

if delta_seq is None:

606

486

# setup a delta seq to reuse.

607

487

delta_seq = seq

618

498

reference_content = self._get_content(parents[0], parent_texts)

619

499

new_texts = content.text()

620

500

old_texts = reference_content.text()

621

delta_seq = patiencediff.PatienceSequenceMatcher(

622

None, old_texts, new_texts)

501

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

623

502

return self._make_line_delta(delta_seq, content)

624

503

625

504

def _make_line_delta(self, delta_seq, new_content):

631

510

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

632

511

return diff_hunks

633

512

634

def _get_components_positions(self, version_ids):

635

"""Produce a map of position data for the components of versions.

636

637

This data is intended to be used for retrieving the knit records.

638

639

A dict of version_id to (method, data_pos, data_size, next) is

640

returned.

641

method is the way referenced data should be applied.

642

data_pos is the position of the data in the knit.

643

data_size is the size of the data in the knit.

644

next is the build-parent of the version, or None for fulltexts.

513

def _get_components(self, version_id):

514

"""Return a list of (version_id, method, data) tuples that

515

makes up version specified by version_id of the knit.

516

517

The components should be applied in the order of the returned

518

list.

519

520

The basis knit will be used to the largest extent possible

521

since it is assumed that accesses to it is faster.

645

522

"""

646

component_data = {}

647

for version_id in version_ids:

648

cursor = version_id

649

650

while cursor is not None and cursor not in component_data:

651

method = self._index.get_method(cursor)

652

if method == 'fulltext':

653

next = None

654

else:

655

next = self.get_parents(cursor)[0]

656

data_pos, data_size = self._index.get_position(cursor)

657

component_data[cursor] = (method, data_pos, data_size, next)

658

cursor = next

659

return component_data

660

523

#profile notes:

524

# 4168 calls in 14912, 2289 internal

525

# 4168 in 9711 to read_records

526

# 52554 in 1250 to get_parents

527

# 170166 in 865 to list.append

528

529

# needed_revisions holds a list of (method, version_id) of

530

# versions that is needed to be fetched to construct the final

531

# version of the file.

532

533

# basis_revisions is a list of versions that needs to be

534

# fetched but exists in the basis knit.

535

536

basis = self.basis_knit

537

needed_versions = []

538

basis_versions = []

539

cursor = version_id

540

541

while 1:

542

picked_knit = self

543

if basis and basis._index.has_version(cursor):

544

picked_knit = basis

545

basis_versions.append(cursor)

546

method = picked_knit._index.get_method(cursor)

547

needed_versions.append((method, cursor))

548

if method == 'fulltext':

549

break

550

cursor = picked_knit.get_parents(cursor)[0]

551

552

components = {}

553

if basis_versions:

554

records = []

555

for comp_id in basis_versions:

556

data_pos, data_size = basis._index.get_data_position(comp_id)

557

records.append((piece_id, data_pos, data_size))

558

components.update(basis._data.read_records(records))

559

560

records = []

561

for comp_id in [vid for method, vid in needed_versions

562

if vid not in basis_versions]:

563

data_pos, data_size = self._index.get_position(comp_id)

564

records.append((comp_id, data_pos, data_size))

565

components.update(self._data.read_records(records))

566

567

# get_data_records returns a mapping with the version id as

568

# index and the value as data. The order the components need

569

# to be applied is held by needed_versions (reversed).

570

out = []

571

for method, comp_id in reversed(needed_versions):

572

out.append((comp_id, method, components[comp_id]))

573

574

return out

575

661

576

def _get_content(self, version_id, parent_texts={}):

662

577

"""Returns a content object that makes up the specified

663

578

version."""

668

583

if cached_version is not None:

669

584

return cached_version

670

585

671

text_map, contents_map = self._get_content_maps([version_id])

672

return contents_map[version_id]

586

if self.basis_knit and version_id in self.basis_knit:

587

return self.basis_knit._get_content(version_id)

588

589

content = None

590

components = self._get_components(version_id)

591

for component_id, method, (data, digest) in components:

592

version_idx = self._index.lookup(component_id)

593

if method == 'fulltext':

594

assert content is None

595

content = self.factory.parse_fulltext(data, version_idx)

596

elif method == 'line-delta':

597

delta = self.factory.parse_line_delta(data, version_idx)

598

content._lines = self._apply_delta(content._lines, delta)

599

600

if 'no-eol' in self._index.get_options(version_id):

601

line = content._lines[-1][1].rstrip('\n')

602

content._lines[-1] = (content._lines[-1][0], line)

603

604

# digest here is the digest from the last applied component.

605

if sha_strings(content.text()) != digest:

606

import pdb;pdb.set_trace()

607

raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)

608

609

return content

673

610

674

611

def _check_versions_present(self, version_ids):

675

612

"""Check that all specified versions are present."""

676

self._index.check_versions_present(version_ids)

613

version_ids = set(version_ids)

614

for r in list(version_ids):

615

if self._index.has_version(r):

616

version_ids.remove(r)

617

if version_ids:

618

raise RevisionNotPresent(list(version_ids)[0], self.filename)

677

619

678

620

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

679

621

"""See VersionedFile.add_lines_with_ghosts()."""

692

634

### FIXME escape. RBC 20060228

693

635

if contains_whitespace(version_id):

694

636

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

696

637

if self.has_version(version_id):

697

638

raise RevisionAlreadyPresent(version_id, self.filename)

698

639

self._check_lines_not_unicode(lines)

742

683

# To speed the extract of texts the delta chain is limited

743

684

# to a fixed number of deltas. This should minimize both

744

685

# I/O and the time spend applying deltas.

745

delta = self._check_should_delta(present_parents)

686

count = 0

687

delta_parents = present_parents

688

while count < 25:

689

parent = delta_parents[0]

690

method = self._index.get_method(parent)

691

if method == 'fulltext':

692

break

693

delta_parents = self._index.get_parents(parent)

694

count = count + 1

695

if method == 'line-delta':

696

delta = False

746

697

747

assert isinstance(version_id, str)

748

698

lines = self.factory.make(lines, version_id)

749

699

if delta or (self.factory.annotated and len(present_parents) > 0):

750

700

# Merge annotations from parent texts if so is needed.

767

717

768

718

def _clone_text(self, new_version_id, old_version_id, parents):

769

719

"""See VersionedFile.clone_text()."""

770

# FIXME RBC 20060228 make fast by only inserting an index with null

771

# delta.

720

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

772

721

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

773

722

774

723

def get_lines(self, version_id):

775

724

"""See VersionedFile.get_lines()."""

776

return self.get_line_list([version_id])[0]

777

778

def _get_record_map(self, version_ids):

779

"""Produce a dictionary of knit records.

780

781

The keys are version_ids, the values are tuples of (method, content,

782

digest, next).

783

method is the way the content should be applied.

784

content is a KnitContent object.

785

digest is the SHA1 digest of this version id after all steps are done

786

next is the build-parent of the version, i.e. the leftmost ancestor.

787

If the method is fulltext, next will be None.

788

"""

789

position_map = self._get_components_positions(version_ids)

790

# c = component_id, m = method, p = position, s = size, n = next

791

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

792

record_map = {}

793

for component_id, content, digest in \

794

self._data.read_records_iter(records):

795

method, position, size, next = position_map[component_id]

796

record_map[component_id] = method, content, digest, next

797

798

return record_map

799

800

def get_text(self, version_id):

801

"""See VersionedFile.get_text"""

802

return self.get_texts([version_id])[0]

803

804

def get_texts(self, version_ids):

805

return [''.join(l) for l in self.get_line_list(version_ids)]

806

807

def get_line_list(self, version_ids):

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

text_map, content_map = self._get_content_maps(version_ids)

813

return [text_map[v] for v in version_ids]

814

815

def _get_content_maps(self, version_ids):

816

"""Produce maps of text and KnitContents

817

818

:return: (text_map, content_map) where text_map contains the texts for

819

the requested versions and content_map contains the KnitContents.

820

Both dicts take version_ids as their keys.

821

"""

822

for version_id in version_ids:

823

if not self.has_version(version_id):

824

raise RevisionNotPresent(version_id, self.filename)

825

record_map = self._get_record_map(version_ids)

826

827

text_map = {}

828

content_map = {}

829

final_content = {}

830

for version_id in version_ids:

831

components = []

832

cursor = version_id

833

while cursor is not None:

834

method, data, digest, next = record_map[cursor]

835

components.append((cursor, method, data, digest))

836

if cursor in content_map:

837

break

838

cursor = next

839

840

content = None

841

for component_id, method, data, digest in reversed(components):

842

if component_id in content_map:

843

content = content_map[component_id]

844

else:

845

if method == 'fulltext':

846

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

848

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

850

content = content.copy()

851

content._lines = self._apply_delta(content._lines,

852

delta)

853

content_map[component_id] = content

854

855

if 'no-eol' in self._index.get_options(version_id):

856

content = content.copy()

857

line = content._lines[-1][1].rstrip('\n')

858

content._lines[-1] = (content._lines[-1][0], line)

859

final_content[version_id] = content

860

861

# digest here is the digest from the last applied component.

862

text = content.text()

863

if sha_strings(text) != digest:

864

raise KnitCorrupt(self.filename,

865

'sha-1 does not match %s' % version_id)

866

867

text_map[version_id] = text

868

return text_map, final_content

869

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

725

return self._get_content(version_id).text()

726

727

def iter_lines_added_or_present_in_versions(self, version_ids=None):

872

728

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

873

729

if version_ids is None:

874

730

version_ids = self.versions()

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

# we don't care about inclusions, the caller cares.

731

# we dont care about inclusions, the caller cares.

880

732

# but we need to setup a list of records to visit.

881

733

# we need version_id, position, length

882

734

version_id_records = []

883

requested_versions = set(version_ids)

735

requested_versions = list(version_ids)

884

736

# filter for available versions

885

737

for version_id in requested_versions:

886

738

if not self.has_version(version_id):

887

739

raise RevisionNotPresent(version_id, self.filename)

888

740

# get a in-component-order queue:

741

version_ids = []

889

742

for version_id in self.versions():

890

743

if version_id in requested_versions:

744

version_ids.append(version_id)

891

745

data_pos, length = self._index.get_position(version_id)

892

746

version_id_records.append((version_id, data_pos, length))

893

747

748

pb = bzrlib.ui.ui_factory.nested_progress_bar()

749

count = 0

894

750

total = len(version_id_records)

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

751

try:

752

pb.update('Walking content.', count, total)

753

for version_id, data, sha_value in \

754

self._data.read_records_iter(version_id_records):

755

pb.update('Walking content.', count, total)

756

method = self._index.get_method(version_id)

757

version_idx = self._index.lookup(version_id)

758

assert method in ('fulltext', 'line-delta')

759

if method == 'fulltext':

760

content = self.factory.parse_fulltext(data, version_idx)

761

for line in content.text():

762

yield line

763

else:

764

delta = self.factory.parse_line_delta(data, version_idx)

765

for start, end, count, lines in delta:

766

for origin, line in lines:

767

yield line

768

count +=1

769

pb.update('Walking content.', total, total)

770

pb.finished()

771

except:

772

pb.update('Walking content.', total, total)

773

pb.finished()

774

raise

909

775

910

776

def num_versions(self):

911

777

"""See VersionedFile.num_versions()."""

915

781

916

782

def annotate_iter(self, version_id):

917

783

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

919

784

content = self._get_content(version_id)

920

785

for origin, text in content.annotate_iter():

921

786

yield origin, text

925

790

# perf notes:

926

791

# optimism counts!

927

792

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

929

793

try:

930

794

return self._index.get_parents(version_id)

931

795

except KeyError:

933

797

934

798

def get_parents_with_ghosts(self, version_id):

935

799

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

937

800

try:

938

801

return self._index.get_parents_with_ghosts(version_id)

939

802

except KeyError:

940

803

raise RevisionNotPresent(version_id, self.filename)

941

804

942

def get_ancestry(self, versions, topo_sorted=True):

805

def get_ancestry(self, versions):

943

806

"""See VersionedFile.get_ancestry."""

944

807

if isinstance(versions, basestring):

945

808

versions = [versions]

946

809

if not versions:

947

810

return []

948

versions = [osutils.safe_revision_id(v) for v in versions]

949

return self._index.get_ancestry(versions, topo_sorted)

811

self._check_versions_present(versions)

812

return self._index.get_ancestry(versions)

950

813

951

814

def get_ancestry_with_ghosts(self, versions):

952

815

"""See VersionedFile.get_ancestry_with_ghosts."""

954

817

versions = [versions]

955

818

if not versions:

956

819

return []

957

versions = [osutils.safe_revision_id(v) for v in versions]

820

self._check_versions_present(versions)

958

821

return self._index.get_ancestry_with_ghosts(versions)

959

822

960

823

#@deprecated_method(zero_eight)

967

830

from bzrlib.weave import Weave

968

831

969

832

w = Weave(self.filename)

970

ancestry = set(self.get_ancestry(version_ids, topo_sorted=False))

833

ancestry = self.get_ancestry(version_ids)

971

834

sorted_graph = topo_sort(self._index.get_graph())

972

835

version_list = [vid for vid in sorted_graph if vid in ancestry]

973

836

980

843

981

844

def plan_merge(self, ver_a, ver_b):

982

845

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

985

ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))

846

ancestors_b = set(self.get_ancestry(ver_b))

986

847

def status_a(revision, text):

987

848

if revision in ancestors_b:

988

849

return 'killed-b', text

989

850

else:

990

851

return 'new-a', text

991

852

992

ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))

853

ancestors_a = set(self.get_ancestry(ver_a))

993

854

def status_b(revision, text):

994

855

if revision in ancestors_a:

995

856

return 'killed-a', text

1023

884

class _KnitComponentFile(object):

1024

885

"""One of the files used to implement a knit database"""

1025

886

1026

def __init__(self, transport, filename, mode, file_mode=None,

1027

create_parent_dir=False, dir_mode=None):

887

def __init__(self, transport, filename, mode, file_mode=None):

1028

888

self._transport = transport

1029

889

self._filename = filename

1030

890

self._mode = mode

1031

self._file_mode = file_mode

1032

self._dir_mode = dir_mode

1033

self._create_parent_dir = create_parent_dir

1034

self._need_to_create = False

891

self._file_mode=file_mode

1035

892

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

893

def write_header(self):

894

if self._transport.append(self._filename, StringIO(self.HEADER),

895

mode=self._file_mode):

896

raise KnitCorrupt(self._filename, 'misaligned after writing header')

1039

897

1040

898

def check_header(self, fp):

1041

899

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

1046

900

if line != self.HEADER:

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

901

raise KnitHeaderError(badline=line)

1049

902

1050

903

def commit(self):

1051

904

"""Commit is a nop."""

1079

932

1080

933

The index file on disc contains a header, followed by one line per knit

1081

934

record. The same revision can be present in an index file more than once.

1082

The first occurrence gets assigned a sequence number starting from 0.

935

The first occurence gets assigned a sequence number starting from 0.

1083

936

1084

937

The format of a single line is

1085

938

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

1096

949

The ' :' marker is the end of record marker.

1097

950

1098

951

partial writes:

1099

when a write is interrupted to the index file, it will result in a line

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

1101

or at the end of the file, then the record that is missing it will be

1102

ignored by the parser.

952

when a write is interrupted to the index file, it will result in a line that

953

does not end in ' :'. If the ' :' is not present at the end of a line, or at

954

the end of the file, then the record that is missing it will be ignored by

955

the parser.

1103

956

1104

When writing new records to the index file, the data is preceded by '\n'

957

When writing new records to the index file, the data is preceeded by '\n'

1105

958

to ensure that records always start on new lines even if the last write was

1106

959

interrupted. As a result its normal for the last line in the index to be

1107

960

missing a trailing newline. One can be added with no harmful effects.

1114

967

1115

968

def _cache_version(self, version_id, options, pos, size, parents):

1116

969

"""Cache a version record in the history array and index cache.

1117

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

970

971

This is inlined into __init__ for performance. KEEP IN SYNC.

1119

972

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

1120

973

indexes).

1121

974

"""

1126

979

self._history.append(version_id)

1127

980

else:

1128

981

index = self._cache[version_id][5]

1129

self._cache[version_id] = (version_id,

982

self._cache[version_id] = (version_id,

1130

983

options,

1131

984

pos,

1132

985

size,

1133

986

parents,

1134

987

index)

1135

988

1136

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1137

create_parent_dir=False, delay_create=False, dir_mode=None):

1138

_KnitComponentFile.__init__(self, transport, filename, mode,

1139

file_mode=file_mode,

1140

create_parent_dir=create_parent_dir,

1141

dir_mode=dir_mode)

989

def __init__(self, transport, filename, mode, create=False, file_mode=None):

990

_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)

1142

991

self._cache = {}

1143

992

# position in _history is the 'official' index for a revision

1144

993

# but the values may have come from a newer entry.

1145

# so - wc -l of a knit index is != the number of unique names

1146

# in the knit.

994

# so - wc -l of a knit index is != the number of uniqe names

995

# in the weave.

1147

996

self._history = []

997

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1148

998

try:

1149

fp = self._transport.get(self._filename)

1150

try:

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

1186

continue

1187

1188

try:

1189

parents = []

1190

for value in rec[4:-1]:

1191

if value[0] == '.':

1192

# uncompressed reference

1193

parent_id = value[1:]

999

count = 0

1000

total = 1

1001

try:

1002

pb.update('read knit index', count, total)

1003

fp = self._transport.get(self._filename)

1004

self.check_header(fp)

1005

# readlines reads the whole file at once:

1006

# bad for transports like http, good for local disk

1007

# we save 60 ms doing this one change (

1008

# from calling readline each time to calling

1009

# readlines once.

1010

# probably what we want for nice behaviour on

1011

# http is a incremental readlines that yields, or

1012

# a check for local vs non local indexes,

1013

for l in fp.readlines():

1014

rec = l.split()

1015

if len(rec) < 5 or rec[-1] != ':':

1016

# corrupt line.

1017

# FIXME: in the future we should determine if its a

1018

# short write - and ignore it

1019

# or a different failure, and raise. RBC 20060407

1020

continue

1021

count += 1

1022

total += 1

1023

#pb.update('read knit index', count, total)

1024

# See self._parse_parents

1025

parents = []

1026

for value in rec[4:-1]:

1027

if '.' == value[0]:

1028

# uncompressed reference

1029

parents.append(value[1:])

1030

else:

1031

# this is 15/4000ms faster than isinstance,

1032

# (in lsprof)

1033

# this function is called thousands of times a

1034

# second so small variations add up.

1035

assert value.__class__ is str

1036

parents.append(self._history[int(value)])

1037

# end self._parse_parents

1038

# self._cache_version(rec[0],

1039

# rec[1].split(','),

1040

# int(rec[2]),

1041

# int(rec[3]),

1042

# parents)

1043

# --- self._cache_version

1044

# only want the _history index to reference the 1st

1045

# index entry for version_id

1046

version_id = rec[0]

1047

if version_id not in self._cache:

1048

index = len(self._history)

1049

self._history.append(version_id)

1194

1050

else:

1195

parent_id = history[int(value)]

1196

parents.append(parent_id)

1197

except (IndexError, ValueError), e:

1198

# The parent could not be decoded to get its parent row. This

1199

# at a minimum will cause this row to have wrong parents, or

1200

# even to apply a delta to the wrong base and decode

1201

# incorrectly. its therefore not usable, and because we have

1202

# encountered a situation where a new knit index had this

1203

# corrupt we can't asssume that no other rows referring to the

1204

# index of this record actually mean the subsequent uncorrupt

1205

# one, so we error.

1206

raise errors.KnitCorrupt(self._filename,

1207

"line %r: %s" % (rec, e))

1208

1209

version_id, options, pos, size = rec[:4]

1210

version_id = version_id

1211

1212

# See self._cache_version

1213

# only want the _history index to reference the 1st

1214

# index entry for version_id

1215

if version_id not in cache:

1216

history_top += 1

1217

index = history_top

1218

history.append(version_id)

1051

index = self._cache[version_id][5]

1052

self._cache[version_id] = (version_id,

1053

rec[1].split(','),

1054

int(rec[2]),

1055

int(rec[3]),

1056

parents,

1057

index)

1058

# --- self._cache_version

1059

except NoSuchFile, e:

1060

if mode != 'w' or not create:

1061

raise

1062

self.write_header()

1063

finally:

1064

pb.update('read knit index', total, total)

1065

pb.finished()

1066

1067

def _parse_parents(self, compressed_parents):

1068

"""convert a list of string parent values into version ids.

1069

1070

ints are looked up in the index.

1071

.FOO values are ghosts and converted in to FOO.

1072

1073

NOTE: the function is retained here for clarity, and for possible

1074

use in partial index reads. However bulk processing now has

1075

it inlined in __init__ for inner-loop optimisation.

1076

"""

1077

result = []

1078

for value in compressed_parents:

1079

if value[-1] == '.':

1080

# uncompressed reference

1081

result.append(value[1:])

1219

1082

else:

1220

index = cache[version_id][5]

1221

cache[version_id] = (version_id,

1222

options.split(','),

1223

int(pos),

1224

int(size),

1225

parents,

1226

index)

1227

# end self._cache_version

1083

# this is 15/4000ms faster than isinstance,

1084

# this function is called thousands of times a

1085

# second so small variations add up.

1086

assert value.__class__ is str

1087

result.append(self._history[int(value)])

1088

return result

1228

1089

1229

1090

def get_graph(self):

1230

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1091

graph = []

1092

for version_id, index in self._cache.iteritems():

1093

graph.append((version_id, index[4]))

1094

return graph

1231

1095

1232

def get_ancestry(self, versions, topo_sorted=True):

1096

def get_ancestry(self, versions):

1233

1097

"""See VersionedFile.get_ancestry."""

1234

1098

# get a graph of all the mentioned versions:

1235

1099

graph = {}

1236

1100

pending = set(versions)

1237

cache = self._cache

1238

while pending:

1101

while len(pending):

1239

1102

version = pending.pop()

1103

parents = self._cache[version][4]

1104

# got the parents ok

1240

1105

# trim ghosts

1241

try:

1242

parents = [p for p in cache[version][4] if p in cache]

1243

except KeyError:

1244

raise RevisionNotPresent(version, self._filename)

1245

# if not completed and not a ghost

1246

pending.update([p for p in parents if p not in graph])

1106

parents = [parent for parent in parents if parent in self._cache]

1107

for parent in parents:

1108

# if not completed and not a ghost

1109

if parent not in graph:

1110

pending.add(parent)

1247

1111

graph[version] = parents

1248

if not topo_sorted:

1249

return graph.keys()

1250

1112

return topo_sort(graph.items())

1251

1113

1252

1114

def get_ancestry_with_ghosts(self, versions):

1253

1115

"""See VersionedFile.get_ancestry_with_ghosts."""

1254

1116

# get a graph of all the mentioned versions:

1255

self.check_versions_present(versions)

1256

cache = self._cache

1257

1117

graph = {}

1258

1118

pending = set(versions)

1259

while pending:

1119

while len(pending):

1260

1120

version = pending.pop()

1261

1121

try:

1262

parents = cache[version][4]

1122

parents = self._cache[version][4]

1263

1123

except KeyError:

1264

1124

# ghost, fake it

1265

1125

graph[version] = []

1126

pass

1266

1127

else:

1267

# if not completed

1268

pending.update([p for p in parents if p not in graph])

1128

# got the parents ok

1129

for parent in parents:

1130

if parent not in graph:

1131

pending.add(parent)

1269

1132

graph[version] = parents

1270

1133

return topo_sort(graph.items())

1271

1134

1286

1149

1287

1150

def _version_list_to_index(self, versions):

1288

1151

result_list = []

1289

cache = self._cache

1290

1152

for version in versions:

1291

if version in cache:

1153

if version in self._cache:

1292

1154

# -- inlined lookup() --

1293

result_list.append(str(cache[version][5]))

1155

result_list.append(str(self._cache[version][5]))

1294

1156

# -- end lookup () --

1295

1157

else:

1296

result_list.append('.' + version)

1158

result_list.append('.' + version.encode('utf-8'))

1297

1159

return ' '.join(result_list)

1298

1160

1299

1161

def add_version(self, version_id, options, pos, size, parents):

1307

1169

(version_id, options, pos, size, parents).

1308

1170

"""

1309

1171

lines = []

1310

orig_history = self._history[:]

1311

orig_cache = self._cache.copy()

1312

1313

try:

1314

for version_id, options, pos, size, parents in versions:

1315

line = "\n%s %s %s %s %s :" % (version_id,

1316

','.join(options),

1317

pos,

1318

size,

1319

self._version_list_to_index(parents))

1320

assert isinstance(line, str), \

1321

'content must be utf-8 encoded: %r' % (line,)

1322

lines.append(line)

1323

self._cache_version(version_id, options, pos, size, parents)

1324

if not self._need_to_create:

1325

self._transport.append_bytes(self._filename, ''.join(lines))

1326

else:

1327

sio = StringIO()

1328

sio.write(self.HEADER)

1329

sio.writelines(lines)

1330

sio.seek(0)

1331

self._transport.put_file_non_atomic(self._filename, sio,

1332

create_parent_dir=self._create_parent_dir,

1333

mode=self._file_mode,

1334

dir_mode=self._dir_mode)

1335

self._need_to_create = False

1336

except:

1337

# If any problems happen, restore the original values and re-raise

1338

self._history = orig_history

1339

self._cache = orig_cache

1340

raise

1341

1172

for version_id, options, pos, size, parents in versions:

1173

line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),

1174

','.join(options),

1175

pos,

1176

size,

1177

self._version_list_to_index(parents))

1178

assert isinstance(line, str), \

1179

'content must be utf-8 encoded: %r' % (line,)

1180

lines.append(line)

1181

self._transport.append(self._filename, StringIO(''.join(lines)))

1182

# cache after writing, so that a failed write leads to missing cache

1183

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1184

# failure, reload the index or flush it or some such, to prevent

1185

# writing records that did complete twice.

1186

for version_id, options, pos, size, parents in versions:

1187

self._cache_version(version_id, options, pos, size, parents)

1188

1342

1189

def has_version(self, version_id):

1343

1190

"""True if the version is in the index."""

1344

return version_id in self._cache

1191

return self._cache.has_key(version_id)

1345

1192

1346

1193

def get_position(self, version_id):

1347

1194

"""Return data position and size of specified version."""

1348

entry = self._cache[version_id]

1349

return entry[2], entry[3]

1195

return (self._cache[version_id][2], \

1196

self._cache[version_id][3])

1350

1197

1351

1198

def get_method(self, version_id):

1352

1199

"""Return compression method of specified version."""

1354

1201

if 'fulltext' in options:

1355

1202

return 'fulltext'

1356

1203

else:

1357

if 'line-delta' not in options:

1358

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1204

assert 'line-delta' in options

1359

1205

return 'line-delta'

1360

1206

1361

1207

def get_options(self, version_id):

1367

1213

if parent in self._cache]

1368

1214

1369

1215

def get_parents_with_ghosts(self, version_id):

1370

"""Return parents of specified version with ghosts."""

1216

"""Return parents of specified version wth ghosts."""

1371

1217

return self._cache[version_id][4]

1372

1218

1373

1219

def check_versions_present(self, version_ids):

1374

1220

"""Check that all specified versions are present."""

1375

cache = self._cache

1376

for version_id in version_ids:

1377

if version_id not in cache:

1378

raise RevisionNotPresent(version_id, self._filename)

1221

version_ids = set(version_ids)

1222

for version_id in list(version_ids):

1223

if version_id in self._cache:

1224

version_ids.remove(version_id)

1225

if version_ids:

1226

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1379

1227

1380

1228

1381

1229

class _KnitData(_KnitComponentFile):

1382

1230

"""Contents of the knit data file"""

1383

1231

1384

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1385

create_parent_dir=False, delay_create=False,

1386

dir_mode=None):

1387

_KnitComponentFile.__init__(self, transport, filename, mode,

1388

file_mode=file_mode,

1389

create_parent_dir=create_parent_dir,

1390

dir_mode=dir_mode)

1232

HEADER = "# bzr knit data 8\n"

1233

1234

def __init__(self, transport, filename, mode, create=False, file_mode=None):

1235

_KnitComponentFile.__init__(self, transport, filename, mode)

1236

self._file = None

1391

1237

self._checked = False

1392

# TODO: jam 20060713 conceptually, this could spill to disk

1393

# if the cached size gets larger than a certain amount

1394

# but it complicates the model a bit, so for now just use

1395

# a simple dictionary

1396

self._cache = {}

1397

self._do_cache = False

1398

1238

if create:

1399

if delay_create:

1400

self._need_to_create = create

1401

else:

1402

self._transport.put_bytes_non_atomic(self._filename, '',

1403

mode=self._file_mode)

1404

1405

def enable_cache(self):

1406

"""Enable caching of reads."""

1407

self._do_cache = True

1239

self._transport.put(self._filename, StringIO(''), mode=file_mode)

1240

self._records = {}

1408

1241

1409

1242

def clear_cache(self):

1410

1243

"""Clear the record cache."""

1411

self._do_cache = False

1412

self._cache = {}

1244

self._records = {}

1413

1245

1414

1246

def _open_file(self):

1415

try:

1416

return self._transport.get(self._filename)

1417

except NoSuchFile:

1418

pass

1419

return None

1247

if self._file is None:

1248

try:

1249

self._file = self._transport.get(self._filename)

1250

except NoSuchFile:

1251

pass

1252

return self._file

1420

1253

1421

1254

def _record_to_data(self, version_id, digest, lines):

1422

1255

"""Convert version_id, digest, lines into a raw data block.

1425

1258

"""

1426

1259

sio = StringIO()

1427

1260

data_file = GzipFile(None, mode='wb', fileobj=sio)

1428

1429

assert isinstance(version_id, str)

1430

1261

data_file.writelines(chain(

1431

["version %s %d %s\n" % (version_id,

1262

["version %s %d %s\n" % (version_id.encode('utf-8'),

1432

1263

len(lines),

1433

1264

digest)],

1434

1265

lines,

1435

["end %s\n" % version_id]))

1266

["end %s\n" % version_id.encode('utf-8')]))

1436

1267

data_file.close()

1437

1268

length= sio.tell()

1438

1269

1445

1276

:return: the offset in the data file raw_data was written.

1446

1277

"""

1447

1278

assert isinstance(raw_data, str), 'data must be plain bytes'

1448

if not self._need_to_create:

1449

return self._transport.append_bytes(self._filename, raw_data)

1450

else:

1451

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1452

create_parent_dir=self._create_parent_dir,

1453

mode=self._file_mode,

1454

dir_mode=self._dir_mode)

1455

self._need_to_create = False

1456

return 0

1279

return self._transport.append(self._filename, StringIO(raw_data))

1457

1280

1458

1281

def add_record(self, version_id, digest, lines):

1459

1282

"""Write new text record to disk. Returns the position in the

1460

1283

file where it was written."""

1461

1284

size, sio = self._record_to_data(version_id, digest, lines)

1285

# cache

1286

self._records[version_id] = (digest, lines)

1462

1287

# write to disk

1463

if not self._need_to_create:

1464

start_pos = self._transport.append_file(self._filename, sio)

1465

else:

1466

self._transport.put_file_non_atomic(self._filename, sio,

1467

create_parent_dir=self._create_parent_dir,

1468

mode=self._file_mode,

1469

dir_mode=self._dir_mode)

1470

self._need_to_create = False

1471

start_pos = 0

1472

if self._do_cache:

1473

self._cache[version_id] = sio.getvalue()

1288

start_pos = self._transport.append(self._filename, sio)

1474

1289

return start_pos, size

1475

1290

1476

1291

def _parse_record_header(self, version_id, raw_data):

1480

1295

as (stream, header_record)

1481

1296

"""

1482

1297

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1483

try:

1484

rec = self._check_header(version_id, df.readline())

1485

except Exception, e:

1486

raise KnitCorrupt(self._filename,

1487

"While reading {%s} got %s(%s)"

1488

% (version_id, e.__class__.__name__, str(e)))

1298

rec = df.readline().split()

1299

if len(rec) != 4:

1300

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1301

if rec[1].decode('utf-8')!= version_id:

1302

raise KnitCorrupt(self._filename,

1303

'unexpected version, wanted %r, got %r' % (

1304

version_id, rec[1]))

1489

1305

return df, rec

1490

1306

1491

def _check_header(self, version_id, line):

1492

rec = line.split()

1493

if len(rec) != 4:

1494

raise KnitCorrupt(self._filename,

1495

'unexpected number of elements in record header')

1496

if rec[1] != version_id:

1497

raise KnitCorrupt(self._filename,

1498

'unexpected version, wanted %r, got %r'

1499

% (version_id, rec[1]))

1500

return rec

1501

1502

1307

def _parse_record(self, version_id, data):

1503

1308

# profiling notes:

1504

1309

# 4168 calls in 2880 217 internal

1505

1310

# 4168 calls to _parse_record_header in 2121

1506

1311

# 4168 calls to readlines in 330

1507

df = GzipFile(mode='rb', fileobj=StringIO(data))

1508

1509

try:

1510

record_contents = df.readlines()

1511

except Exception, e:

1512

raise KnitCorrupt(self._filename,

1513

"While reading {%s} got %s(%s)"

1514

% (version_id, e.__class__.__name__, str(e)))

1515

header = record_contents.pop(0)

1516

rec = self._check_header(version_id, header)

1517

1518

last_line = record_contents.pop()

1519

if len(record_contents) != int(rec[2]):

1520

raise KnitCorrupt(self._filename,

1521

'incorrect number of lines %s != %s'

1522

' for version {%s}'

1523

% (len(record_contents), int(rec[2]),

1524

version_id))

1525

if last_line != 'end %s\n' % rec[1]:

1526

raise KnitCorrupt(self._filename,

1527

'unexpected version end line %r, wanted %r'

1528

% (last_line, version_id))

1312

df, rec = self._parse_record_header(version_id, data)

1313

record_contents = df.readlines()

1314

l = record_contents.pop()

1315

assert len(record_contents) == int(rec[2])

1316

if l.decode('utf-8') != 'end %s\n' % version_id:

1317

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1318

% (l, version_id))

1529

1319

df.close()

1530

1320

return record_contents, rec[3]

1531

1321

1534

1324

1535

1325

This unpacks enough of the text record to validate the id is

1536

1326

as expected but thats all.

1327

1328

It will actively recompress currently cached records on the

1329

basis that that is cheaper than I/O activity.

1537

1330

"""

1331

needed_records = []

1332

for version_id, pos, size in records:

1333

if version_id not in self._records:

1334

needed_records.append((version_id, pos, size))

1335

1538

1336

# setup an iterator of the external records:

1539

1337

# uses readv so nice and fast we hope.

1540

if len(records):

1338

if len(needed_records):

1541

1339

# grab the disk data needed.

1542

if self._cache:

1543

# Don't check _cache if it is empty

1544

needed_offsets = [(pos, size) for version_id, pos, size

1545

in records

1546

if version_id not in self._cache]

1547

else:

1548

needed_offsets = [(pos, size) for version_id, pos, size

1549

in records]

1550

1551

raw_records = self._transport.readv(self._filename, needed_offsets)

1340

raw_records = self._transport.readv(self._filename,

1341

[(pos, size) for version_id, pos, size in needed_records])

1552

1342

1553

1343

for version_id, pos, size in records:

1554

if version_id in self._cache:

1555

# This data has already been validated

1556

data = self._cache[version_id]

1344

if version_id in self._records:

1345

# compress a new version

1346

size, sio = self._record_to_data(version_id,

1347

self._records[version_id][0],

1348

self._records[version_id][1])

1349

yield version_id, sio.getvalue()

1557

1350

else:

1558

1351

pos, data = raw_records.next()

1559

if self._do_cache:

1560

self._cache[version_id] = data

1561

1562

1352

# validate the header

1563

1353

df, rec = self._parse_record_header(version_id, data)

1564

1354

df.close()

1565

yield version_id, data

1355

yield version_id, data

1356

1566

1357

1567

1358

def read_records_iter(self, records):

1568

1359

"""Read text records from data file and yield result.

1569

1360

1570

The result will be returned in whatever is the fastest to read.

1571

Not by the order requested. Also, multiple requests for the same

1572

record will only yield 1 response.

1573

:param records: A list of (version_id, pos, len) entries

1574

:return: Yields (version_id, contents, digest) in the order

1575

read, not the order requested

1361

Each passed record is a tuple of (version_id, pos, len) and

1362

will be read in the given order. Yields (version_id,

1363

contents, digest).

1576

1364

"""

1577

if not records:

1578

return

1579

1580

if self._cache:

1581

# Skip records we have alread seen

1582

yielded_records = set()

1583

needed_records = set()

1584

for record in records:

1585

if record[0] in self._cache:

1586

if record[0] in yielded_records:

1587

continue

1588

yielded_records.add(record[0])

1589

data = self._cache[record[0]]

1590

content, digest = self._parse_record(record[0], data)

1591

yield (record[0], content, digest)

1592

else:

1593

needed_records.add(record)

1594

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1595

else:

1596

needed_records = sorted(set(records), key=operator.itemgetter(1))

1597

1598

if not needed_records:

1599

return

1600

1601

# The transport optimizes the fetching as well

1602

# (ie, reads continuous ranges.)

1603

readv_response = self._transport.readv(self._filename,

1604

[(pos, size) for version_id, pos, size in needed_records])

1605

1606

for (version_id, pos, size), (pos, data) in \

1607

izip(iter(needed_records), readv_response):

1608

content, digest = self._parse_record(version_id, data)

1609

if self._do_cache:

1610

self._cache[version_id] = data

1611

yield version_id, content, digest

1365

# profiling notes:

1366

# 60890 calls for 4168 extractions in 5045, 683 internal.

1367

# 4168 calls to readv in 1411

1368

# 4168 calls to parse_record in 2880

1369

1370

needed_records = []

1371

for version_id, pos, size in records:

1372

if version_id not in self._records:

1373

needed_records.append((version_id, pos, size))

1374

1375

if len(needed_records):

1376

# We take it that the transport optimizes the fetching as good

1377

# as possible (ie, reads continous ranges.)

1378

response = self._transport.readv(self._filename,

1379

[(pos, size) for version_id, pos, size in needed_records])

1380

1381

for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):

1382

content, digest = self._parse_record(record_id, data)

1383

self._records[record_id] = (digest, content)

1384

1385

for version_id, pos, size in records:

1386

yield version_id, list(self._records[version_id][1]), self._records[version_id][0]

1612

1387

1613

1388

def read_records(self, records):

1614

1389

"""Read records into a dictionary."""

1615

1390

components = {}

1616

for record_id, content, digest in \

1617

self.read_records_iter(records):

1391

for record_id, content, digest in self.read_records_iter(records):

1618

1392

components[record_id] = (content, digest)

1619

1393

return components

1620

1394

1644

1418

if not version_ids:

1645

1419

return 0

1646

1420

1647

pb = ui.ui_factory.nested_progress_bar()

1421

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1648

1422

try:

1649

1423

version_ids = list(version_ids)

1650

1424

if None in version_ids:

1695

1469

# if source has the parent, we must :

1696

1470

# * already have it or

1697

1471

# * have it scheduled already

1698

# otherwise we don't care

1472

# otherwise we dont care

1699

1473

assert (self.target.has_version(parent) or

1700

1474

parent in copy_set or

1701

1475

not self.source.has_version(parent))

1761

1535

if not version_ids:

1762

1536

return 0

1763

1537

1764

pb = ui.ui_factory.nested_progress_bar()

1538

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1765

1539

try:

1766

1540

version_ids = list(version_ids)

1767

1541

1906

1680

j2lenget = j2len.get

1907

1681

newj2len = {}

1908

1682

1909

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1683

# changing b2j.get(a[i], nothing) to a try:Keyerror pair produced the

1910

1684

# following improvement

1911

1685

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1912

1686

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1961

1735

bestsize = bestsize + 1

1962

1736

1963

1737

return besti, bestj, bestsize

1738

Older »