~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Aaron Bentley
Date: 2007-03-07 19:33:08 UTC
mfrom: (2324 +trunk)
mto: (2323.6.9 0.15-integration)
mto: This revision was merged to the branch mainline in revision 2330.
Revision ID: abentley@panoramicfeedback.com-20070307193308-0d4e2jx8m14w5n99

Merge bzr.dev

files added:
COPYING.txt

bzr.ico

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/cache_utf8.py

bzrlib/cmd_version_info.py

bzrlib/debug.py

bzrlib/dirstate.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/help_topics.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/memorytree.py

bzrlib/merge_directive.py

bzrlib/mutabletree.py

bzrlib/patches.py

bzrlib/plugins/launchpad/lp_indirect.py

bzrlib/plugins/launchpad/test_lp_indirect.py

bzrlib/registry.py

bzrlib/repofmt

bzrlib/repofmt/__init__.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/revisiontree.py

bzrlib/tag.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HttpServer.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_last_revision_info.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_tags.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_extract.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_timestamp.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_file_mtime.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_list_files.py

bzrlib/tests/tree_implementations/test_revision_tree.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_tree.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_flush.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_nested_specifics.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_paths2ids.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_read_working_inventory.py

bzrlib/tests/workingtree_implementations/test_readonly.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_revision_tree.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_smart_add.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/timestamp.py

bzrlib/transport/chroot.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/version.py

bzrlib/version_info_formats

bzrlib/version_info_formats/__init__.py

bzrlib/version_info_formats/format_python.py

bzrlib/version_info_formats/format_rio.py

bzrlib/workingtree_4.py

bzrlib/xml6.py

bzrlib/xml7.py

doc/bazaar-vcs.org.kid

doc/centralized_workflow.txt

doc/default.css

doc/http_smart_server.txt

doc/index.txt

doc/server.txt

doc/version_info.txt

profile_imports.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/info.txt

tools/win32/ostools.py

tools/win32/start_bzr.bat

files removed:
bzrlib/util/configobj/validate.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

patience-test.py

files renamed:
bzrlib/tests/test_revprops.py => bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/test_command.py => bzrlib/tests/test_commands.py

bzrlib/win32console.py => bzrlib/win32utils.py

files modified:
.bzrignore

BRANCH.TODO

HACKING

Makefile

NEWS

README

TODO

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util/configobj/configobj.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib/newinventory.py

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

setup.py *

tools/convertfile.py

tools/convertinv.py

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# Written by Martin Pool.

# Modified by Johan Rydberg <jrydberg@gnu.org>

# Modified by Robert Collins <robert.collins@canonical.com>

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from cStringIO import StringIO

import difflib

from itertools import izip, chain

import operator

import os

import sys

import warnings

import bzrlib

import bzrlib.errors as errors

from bzrlib.errors import FileExists, NoSuchFile, KnitError, \

InvalidRevisionId, KnitCorrupt, KnitHeaderError, \

RevisionNotPresent, RevisionAlreadyPresent

from bzrlib.tuned_gzip import *

from bzrlib import (

cache_utf8,

errors,

osutils,

patiencediff,

progress,

ui,

)

from bzrlib.errors import (

FileExists,

NoSuchFile,

KnitError,

InvalidRevisionId,

KnitCorrupt,

KnitHeaderError,

RevisionNotPresent,

RevisionAlreadyPresent,

)

from bzrlib.tuned_gzip import GzipFile

from bzrlib.trace import mutter

from bzrlib.osutils import contains_whitespace, contains_linebreaks, \

sha_strings

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

from bzrlib.osutils import (

contains_whitespace,

contains_linebreaks,

sha_strings,

)

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

from bzrlib.tsort import topo_sort

100

import bzrlib.ui

101

import bzrlib.weave

102

from bzrlib.versionedfile import VersionedFile, InterVersionedFile

103

104

105

# TODO: Split out code specific to this format into an associated object.

107

# TODO: Can we put in some kind of value to check that the index and data

108

# files belong together?

109

# TODO: accomodate binaries, perhaps by storing a byte count

110

# TODO: accommodate binaries, perhaps by storing a byte count

111

112

# TODO: function to check whole file

113

109

127

110

128

def annotate_iter(self):

111

129

"""Yield tuples of (origin, text) for each content line."""

112

for origin, text in self._lines:

113

yield origin, text

130

return iter(self._lines)

114

131

115

132

def annotate(self):

116

133

"""Return a list of (origin, text) tuples."""

118

135

119

136

def line_delta_iter(self, new_lines):

120

137

"""Generate line-based delta from this content to new_lines."""

121

new_texts = [text for origin, text in new_lines._lines]

122

old_texts = [text for origin, text in self._lines]

123

s = SequenceMatcher(None, old_texts, new_texts)

124

for op in s.get_opcodes():

125

if op[0] == 'equal':

138

new_texts = new_lines.text()

139

old_texts = self.text()

140

s = KnitSequenceMatcher(None, old_texts, new_texts)

141

for tag, i1, i2, j1, j2 in s.get_opcodes():

142

if tag == 'equal':

126

143

continue

127

# ofrom oto length data

128

yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])

144

# ofrom, oto, length, data

145

yield i1, i2, j2 - j1, new_lines._lines[j1:j2]

129

146

130

147

def line_delta(self, new_lines):

131

148

return list(self.line_delta_iter(new_lines))

133

150

def text(self):

134

151

return [text for origin, text in self._lines]

135

152

153

def copy(self):

154

return KnitContent(self._lines[:])

155

136

156

137

157

class _KnitFactory(object):

138

158

"""Base factory for creating content objects."""

139

159

140

def make(self, lines, version):

160

def make(self, lines, version_id):

141

161

num_lines = len(lines)

142

return KnitContent(zip([version] * num_lines, lines))

162

return KnitContent(zip([version_id] * num_lines, lines))

143

163

144

164

145

165

class KnitAnnotateFactory(_KnitFactory):

147

167

148

168

annotated = True

149

169

150

def parse_fulltext(self, content, version):

170

def parse_fulltext(self, content, version_id):

151

171

"""Convert fulltext to internal representation

152

172

153

173

fulltext content is of the format

155

175

internal representation is of the format:

156

176

(revid, plaintext)

157

177

"""

158

lines = []

159

for line in content:

160

origin, text = line.split(' ', 1)

161

lines.append((origin.decode('utf-8'), text))

178

# TODO: jam 20070209 The tests expect this to be returned as tuples,

179

# but the code itself doesn't really depend on that.

180

# Figure out a way to not require the overhead of turning the

181

# list back into tuples.

182

lines = [tuple(line.split(' ', 1)) for line in content]

162

183

return KnitContent(lines)

163

184

164

185

def parse_line_delta_iter(self, lines):

165

for result_item in self.parse_line_delta[lines]:

166

yield result_item

186

return iter(self.parse_line_delta(lines))

167

187

168

def parse_line_delta(self, lines, version):

188

def parse_line_delta(self, lines, version_id):

169

189

"""Convert a line based delta into internal representation.

170

190

171

191

line delta is in the form of:

172

192

intstart intend intcount

173

193

1..count lines:

174

194

revid(utf8) newline\n

175

internal represnetation is

195

internal representation is

176

196

(start, end, count, [1..count tuples (revid, newline)])

177

197

"""

178

198

result = []

179

199

lines = iter(lines)

180

200

next = lines.next

201

202

cache = {}

203

def cache_and_return(line):

204

origin, text = line.split(' ', 1)

205

return cache.setdefault(origin, origin), text

206

181

207

# walk through the lines parsing.

182

208

for header in lines:

183

209

start, end, count = [int(n) for n in header.split(',')]

184

contents = []

185

remaining = count

186

while remaining:

210

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

211

result.append((start, end, count, contents))

212

return result

213

214

def get_fulltext_content(self, lines):

215

"""Extract just the content lines from a fulltext."""

216

return (line.split(' ', 1)[1] for line in lines)

217

218

def get_linedelta_content(self, lines):

219

"""Extract just the content from a line delta.

220

221

This doesn't return all of the extra information stored in a delta.

222

Only the actual content lines.

223

"""

224

lines = iter(lines)

225

next = lines.next

226

for header in lines:

227

header = header.split(',')

228

count = int(header[2])

229

for i in xrange(count):

187

230

origin, text = next().split(' ', 1)

188

remaining -= 1

189

contents.append((origin.decode('utf-8'), text))

190

result.append((start, end, count, contents))

191

return result

231

yield text

192

232

193

233

def lower_fulltext(self, content):

194

234

"""convert a fulltext content record into a serializable form.

195

235

196

236

see parse_fulltext which this inverts.

197

237

"""

198

return ['%s %s' % (o.encode('utf-8'), t) for o, t in content._lines]

238

# TODO: jam 20070209 We only do the caching thing to make sure that

239

# the origin is a valid utf-8 line, eventually we could remove it

240

return ['%s %s' % (o, t) for o, t in content._lines]

199

241

200

242

def lower_line_delta(self, delta):

201

243

"""convert a delta into a serializable form.

202

244

203

245

See parse_line_delta which this inverts.

204

246

"""

247

# TODO: jam 20070209 We only do the caching thing to make sure that

248

# the origin is a valid utf-8 line, eventually we could remove it

205

249

out = []

206

250

for start, end, c, lines in delta:

207

251

out.append('%d,%d,%d\n' % (start, end, c))

208

for origin, text in lines:

209

out.append('%s %s' % (origin.encode('utf-8'), text))

252

out.extend(origin + ' ' + text

253

for origin, text in lines)

210

254

return out

211

255

212

256

215

259

216

260

annotated = False

217

261

218

def parse_fulltext(self, content, version):

262

def parse_fulltext(self, content, version_id):

219

263

"""This parses an unannotated fulltext.

220

264

221

265

Note that this is not a noop - the internal representation

222

266

has (versionid, line) - its just a constant versionid.

223

267

"""

224

return self.make(content, version)

268

return self.make(content, version_id)

225

269

226

def parse_line_delta_iter(self, lines, version):

227

while lines:

228

header = lines.pop(0)

270

def parse_line_delta_iter(self, lines, version_id):

271

cur = 0

272

num_lines = len(lines)

273

while cur < num_lines:

274

header = lines[cur]

275

cur += 1

229

276

start, end, c = [int(n) for n in header.split(',')]

230

yield start, end, c, zip([version] * c, lines[:c])

231

del lines[:c]

232

233

def parse_line_delta(self, lines, version):

234

return list(self.parse_line_delta_iter(lines, version))

235

277

yield start, end, c, zip([version_id] * c, lines[cur:cur+c])

278

cur += c

279

280

def parse_line_delta(self, lines, version_id):

281

return list(self.parse_line_delta_iter(lines, version_id))

282

283

def get_fulltext_content(self, lines):

284

"""Extract just the content lines from a fulltext."""

285

return iter(lines)

286

287

def get_linedelta_content(self, lines):

288

"""Extract just the content from a line delta.

289

290

This doesn't return all of the extra information stored in a delta.

291

Only the actual content lines.

292

"""

293

lines = iter(lines)

294

next = lines.next

295

for header in lines:

296

header = header.split(',')

297

count = int(header[2])

298

for i in xrange(count):

299

yield next()

300

236

301

def lower_fulltext(self, content):

237

302

return content.text()

238

303

265

330

stored and retrieved.

266

331

"""

267

332

268

def __init__(self, relpath, transport, file_mode=None, access_mode=None, factory=None,

269

basis_knit=None, delta=True, create=False):

333

def __init__(self, relpath, transport, file_mode=None, access_mode=None,

334

factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,

335

create=False, create_parent_dir=False, delay_create=False,

336

dir_mode=None):

270

337

"""Construct a knit at location specified by relpath.

271

338

272

339

:param create: If not True, only open an existing knit.

340

:param create_parent_dir: If True, create the parent directory if

341

creating the file fails. (This is used for stores with

342

hash-prefixes that may not exist yet)

343

:param delay_create: The calling code is aware that the knit won't

344

actually be created until the first data is stored.

273

345

"""

346

if deprecated_passed(basis_knit):

347

warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"

348

" deprecated as of bzr 0.9.",

349

DeprecationWarning, stacklevel=2)

274

350

if access_mode is None:

275

351

access_mode = 'w'

276

352

super(KnitVersionedFile, self).__init__(access_mode)

277

353

assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode

278

assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \

279

type(basis_knit)

280

281

354

self.transport = transport

282

355

self.filename = relpath

283

self.basis_knit = basis_knit

284

356

self.factory = factory or KnitAnnotateFactory()

285

357

self.writable = (access_mode == 'w')

286

358

self.delta = delta

287

359

360

self._max_delta_chain = 200

361

288

362

self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,

289

access_mode, create=create, file_mode=file_mode)

363

access_mode, create=create, file_mode=file_mode,

364

create_parent_dir=create_parent_dir, delay_create=delay_create,

365

dir_mode=dir_mode)

290

366

self._data = _KnitData(transport, relpath + DATA_SUFFIX,

291

access_mode, create=create and not len(self), file_mode=file_mode)

367

access_mode, create=create and not len(self), file_mode=file_mode,

368

create_parent_dir=create_parent_dir, delay_create=delay_create,

369

dir_mode=dir_mode)

292

370

293

371

def __repr__(self):

294

372

return '%s(%s)' % (self.__class__.__name__,

295

373

self.transport.abspath(self.filename))

296

374

375

def _check_should_delta(self, first_parents):

376

"""Iterate back through the parent listing, looking for a fulltext.

377

378

This is used when we want to decide whether to add a delta or a new

379

fulltext. It searches for _max_delta_chain parents. When it finds a

380

fulltext parent, it sees if the total size of the deltas leading up to

381

it is large enough to indicate that we want a new full text anyway.

382

383

Return True if we should create a new delta, False if we should use a

384

full text.

385

"""

386

delta_size = 0

387

fulltext_size = None

388

delta_parents = first_parents

389

for count in xrange(self._max_delta_chain):

390

parent = delta_parents[0]

391

method = self._index.get_method(parent)

392

pos, size = self._index.get_position(parent)

393

if method == 'fulltext':

394

fulltext_size = size

395

break

396

delta_size += size

397

delta_parents = self._index.get_parents(parent)

398

else:

399

# We couldn't find a fulltext, so we must create a new one

400

return False

401

402

return fulltext_size > delta_size

403

297

404

def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):

298

405

"""See VersionedFile._add_delta()."""

299

406

self._check_add(version_id, []) # should we check the lines ?

331

438

# To speed the extract of texts the delta chain is limited

332

439

# to a fixed number of deltas. This should minimize both

333

440

# I/O and the time spend applying deltas.

334

count = 0

335

delta_parents = [delta_parent]

336

while count < 25:

337

parent = delta_parents[0]

338

method = self._index.get_method(parent)

339

if method == 'fulltext':

340

break

341

delta_parents = self._index.get_parents(parent)

342

count = count + 1

343

if method == 'line-delta':

344

# did not find a fulltext in the delta limit.

345

# just do a normal insertion.

441

# The window was changed to a maximum of 200 deltas, but also added

442

# was a check that the total compressed size of the deltas is

443

# smaller than the compressed size of the fulltext.

444

if not self._check_should_delta([delta_parent]):

445

# We don't want a delta here, just do a normal insertion.

346

446

return super(KnitVersionedFile, self)._add_delta(version_id,

347

447

parents,

348

448

delta_parent,

362

462

:param records: A list of tuples(version_id, options, parents, size).

363

463

:param data: The data for the records. When it is written, the records

364

464

are adjusted to have pos pointing into data by the sum of

365

the preceeding records sizes.

465

the preceding records sizes.

366

466

"""

367

467

# write all the data

368

468

pos = self._data.add_raw_record(data)

469

offset = 0

369

470

index_entries = []

370

471

for (version_id, options, parents, size) in records:

371

index_entries.append((version_id, options, pos, size, parents))

372

pos += size

472

index_entries.append((version_id, options, pos+offset,

473

size, parents))

474

if self._data._do_cache:

475

self._data._cache[version_id] = data[offset:offset+size]

476

offset += size

373

477

self._index.add_versions(index_entries)

374

478

479

def enable_cache(self):

480

"""Start caching data for this knit"""

481

self._data.enable_cache()

482

375

483

def clear_cache(self):

376

484

"""Clear the data cache only."""

377

485

self._data.clear_cache()

380

488

"""See VersionedFile.copy_to()."""

381

489

# copy the current index to a temp index to avoid racing with local

382

490

# writes

383

transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)

491

transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',

492

self.transport.get(self._index._filename))

384

493

# copy the data file

385

transport.put(name + DATA_SUFFIX, self._data._open_file())

386

# rename the copied index into place

387

transport.rename(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

494

f = self._data._open_file()

495

try:

496

transport.put_file(name + DATA_SUFFIX, f)

497

finally:

498

f.close()

499

# move the copied index into place

500

transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)

388

501

389

502

def create_empty(self, name, transport, mode=None):

390

return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)

503

return KnitVersionedFile(name, transport, factory=self.factory,

504

delta=self.delta, create=True)

391

505

392

def _fix_parents(self, version, new_parents):

506

def _fix_parents(self, version_id, new_parents):

393

507

"""Fix the parents list for version.

394

508

395

509

This is done by appending a new version to the index

397

511

the parents list must be a superset of the current

398

512

list.

399

513

"""

400

current_values = self._index._cache[version]

514

current_values = self._index._cache[version_id]

401

515

assert set(current_values[4]).difference(set(new_parents)) == set()

402

self._index.add_version(version,

516

self._index.add_version(version_id,

403

517

current_values[1],

404

518

current_values[2],

405

519

current_values[3],

407

521

408

522

def get_delta(self, version_id):

409

523

"""Get a delta for constructing version from some other version."""

524

version_id = osutils.safe_revision_id(version_id)

525

self.check_not_reserved_id(version_id)

410

526

if not self.has_version(version_id):

411

527

raise RevisionNotPresent(version_id, self.filename)

412

528

417

533

parent = None

418

534

data_pos, data_size = self._index.get_position(version_id)

419

535

data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]

420

version_idx = self._index.lookup(version_id)

421

536

noeol = 'no-eol' in self._index.get_options(version_id)

422

537

if 'fulltext' == self._index.get_method(version_id):

423

new_content = self.factory.parse_fulltext(data, version_idx)

538

new_content = self.factory.parse_fulltext(data, version_id)

424

539

if parent is not None:

425

540

reference_content = self._get_content(parent)

426

541

old_texts = reference_content.text()

427

542

else:

428

543

old_texts = []

429

544

new_texts = new_content.text()

430

delta_seq = SequenceMatcher(None, old_texts, new_texts)

545

delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)

431

546

return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)

432

547

else:

433

delta = self.factory.parse_line_delta(data, version_idx)

548

delta = self.factory.parse_line_delta(data, version_id)

434

549

return parent, sha1, noeol, delta

435

550

436

551

def get_graph_with_ghosts(self):

440

555

441

556

def get_sha1(self, version_id):

442

557

"""See VersionedFile.get_sha1()."""

443

components = self._get_components(version_id)

444

return components[-1][-1][-1]

558

version_id = osutils.safe_revision_id(version_id)

559

record_map = self._get_record_map([version_id])

560

method, content, digest, next = record_map[version_id]

561

return digest

445

562

446

563

@staticmethod

447

564

def get_suffixes():

450

567

451

568

def has_ghost(self, version_id):

452

569

"""True if there is a ghost reference in the file to version_id."""

570

version_id = osutils.safe_revision_id(version_id)

453

571

# maybe we have it

454

572

if self.has_version(version_id):

455

573

return False

468

586

469

587

def has_version(self, version_id):

470

588

"""See VersionedFile.has_version."""

589

version_id = osutils.safe_revision_id(version_id)

471

590

return self._index.has_version(version_id)

472

591

473

592

__contains__ = has_version

481

600

delta_seq = None

482

601

for parent_id in parents:

483

602

merge_content = self._get_content(parent_id, parent_texts)

484

seq = SequenceMatcher(None, merge_content.text(), content.text())

603

seq = patiencediff.PatienceSequenceMatcher(

604

None, merge_content.text(), content.text())

485

605

if delta_seq is None:

486

606

# setup a delta seq to reuse.

487

607

delta_seq = seq

498

618

reference_content = self._get_content(parents[0], parent_texts)

499

619

new_texts = content.text()

500

620

old_texts = reference_content.text()

501

delta_seq = SequenceMatcher(None, old_texts, new_texts)

621

delta_seq = patiencediff.PatienceSequenceMatcher(

622

None, old_texts, new_texts)

502

623

return self._make_line_delta(delta_seq, content)

503

624

504

625

def _make_line_delta(self, delta_seq, new_content):

510

631

diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))

511

632

return diff_hunks

512

633

513

def _get_components(self, version_id):

514

"""Return a list of (version_id, method, data) tuples that

515

makes up version specified by version_id of the knit.

516

517

The components should be applied in the order of the returned

518

list.

519

520

The basis knit will be used to the largest extent possible

521

since it is assumed that accesses to it is faster.

634

def _get_components_positions(self, version_ids):

635

"""Produce a map of position data for the components of versions.

636

637

This data is intended to be used for retrieving the knit records.

638

639

A dict of version_id to (method, data_pos, data_size, next) is

640

returned.

641

method is the way referenced data should be applied.

642

data_pos is the position of the data in the knit.

643

data_size is the size of the data in the knit.

644

next is the build-parent of the version, or None for fulltexts.

522

645

"""

523

#profile notes:

524

# 4168 calls in 14912, 2289 internal

525

# 4168 in 9711 to read_records

526

# 52554 in 1250 to get_parents

527

# 170166 in 865 to list.append

528

529

# needed_revisions holds a list of (method, version_id) of

530

# versions that is needed to be fetched to construct the final

531

# version of the file.

532

533

# basis_revisions is a list of versions that needs to be

534

# fetched but exists in the basis knit.

535

536

basis = self.basis_knit

537

needed_versions = []

538

basis_versions = []

539

cursor = version_id

540

541

while 1:

542

picked_knit = self

543

if basis and basis._index.has_version(cursor):

544

picked_knit = basis

545

basis_versions.append(cursor)

546

method = picked_knit._index.get_method(cursor)

547

needed_versions.append((method, cursor))

548

if method == 'fulltext':

549

break

550

cursor = picked_knit.get_parents(cursor)[0]

551

552

components = {}

553

if basis_versions:

554

records = []

555

for comp_id in basis_versions:

556

data_pos, data_size = basis._index.get_data_position(comp_id)

557

records.append((piece_id, data_pos, data_size))

558

components.update(basis._data.read_records(records))

559

560

records = []

561

for comp_id in [vid for method, vid in needed_versions

562

if vid not in basis_versions]:

563

data_pos, data_size = self._index.get_position(comp_id)

564

records.append((comp_id, data_pos, data_size))

565

components.update(self._data.read_records(records))

566

567

# get_data_records returns a mapping with the version id as

568

# index and the value as data. The order the components need

569

# to be applied is held by needed_versions (reversed).

570

out = []

571

for method, comp_id in reversed(needed_versions):

572

out.append((comp_id, method, components[comp_id]))

573

574

return out

575

646

component_data = {}

647

for version_id in version_ids:

648

cursor = version_id

649

650

while cursor is not None and cursor not in component_data:

651

method = self._index.get_method(cursor)

652

if method == 'fulltext':

653

next = None

654

else:

655

next = self.get_parents(cursor)[0]

656

data_pos, data_size = self._index.get_position(cursor)

657

component_data[cursor] = (method, data_pos, data_size, next)

658

cursor = next

659

return component_data

660

576

661

def _get_content(self, version_id, parent_texts={}):

577

662

"""Returns a content object that makes up the specified

578

663

version."""

583

668

if cached_version is not None:

584

669

return cached_version

585

670

586

if self.basis_knit and version_id in self.basis_knit:

587

return self.basis_knit._get_content(version_id)

588

589

content = None

590

components = self._get_components(version_id)

591

for component_id, method, (data, digest) in components:

592

version_idx = self._index.lookup(component_id)

593

if method == 'fulltext':

594

assert content is None

595

content = self.factory.parse_fulltext(data, version_idx)

596

elif method == 'line-delta':

597

delta = self.factory.parse_line_delta(data, version_idx)

598

content._lines = self._apply_delta(content._lines, delta)

599

600

if 'no-eol' in self._index.get_options(version_id):

601

line = content._lines[-1][1].rstrip('\n')

602

content._lines[-1] = (content._lines[-1][0], line)

603

604

# digest here is the digest from the last applied component.

605

if sha_strings(content.text()) != digest:

606

import pdb;pdb.set_trace()

607

raise KnitCorrupt(self.filename, 'sha-1 does not match %s' % version_id)

608

609

return content

671

text_map, contents_map = self._get_content_maps([version_id])

672

return contents_map[version_id]

610

673

611

674

def _check_versions_present(self, version_ids):

612

675

"""Check that all specified versions are present."""

613

version_ids = set(version_ids)

614

for r in list(version_ids):

615

if self._index.has_version(r):

616

version_ids.remove(r)

617

if version_ids:

618

raise RevisionNotPresent(list(version_ids)[0], self.filename)

676

self._index.check_versions_present(version_ids)

619

677

620

678

def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):

621

679

"""See VersionedFile.add_lines_with_ghosts()."""

634

692

### FIXME escape. RBC 20060228

635

693

if contains_whitespace(version_id):

636

694

raise InvalidRevisionId(version_id, self.filename)

695

self.check_not_reserved_id(version_id)

637

696

if self.has_version(version_id):

638

697

raise RevisionAlreadyPresent(version_id, self.filename)

639

698

self._check_lines_not_unicode(lines)

683

742

# To speed the extract of texts the delta chain is limited

684

743

# to a fixed number of deltas. This should minimize both

685

744

# I/O and the time spend applying deltas.

686

count = 0

687

delta_parents = present_parents

688

while count < 25:

689

parent = delta_parents[0]

690

method = self._index.get_method(parent)

691

if method == 'fulltext':

692

break

693

delta_parents = self._index.get_parents(parent)

694

count = count + 1

695

if method == 'line-delta':

696

delta = False

745

delta = self._check_should_delta(present_parents)

697

746

747

assert isinstance(version_id, str)

698

748

lines = self.factory.make(lines, version_id)

699

749

if delta or (self.factory.annotated and len(present_parents) > 0):

700

750

# Merge annotations from parent texts if so is needed.

717

767

718

768

def _clone_text(self, new_version_id, old_version_id, parents):

719

769

"""See VersionedFile.clone_text()."""

720

# FIXME RBC 20060228 make fast by only inserting an index with null delta.

770

# FIXME RBC 20060228 make fast by only inserting an index with null

771

# delta.

721

772

self.add_lines(new_version_id, parents, self.get_lines(old_version_id))

722

773

723

774

def get_lines(self, version_id):

724

775

"""See VersionedFile.get_lines()."""

725

return self._get_content(version_id).text()

726

727

def iter_lines_added_or_present_in_versions(self, version_ids=None):

776

return self.get_line_list([version_id])[0]

777

778

def _get_record_map(self, version_ids):

779

"""Produce a dictionary of knit records.

780

781

The keys are version_ids, the values are tuples of (method, content,

782

digest, next).

783

method is the way the content should be applied.

784

content is a KnitContent object.

785

digest is the SHA1 digest of this version id after all steps are done

786

next is the build-parent of the version, i.e. the leftmost ancestor.

787

If the method is fulltext, next will be None.

788

"""

789

position_map = self._get_components_positions(version_ids)

790

# c = component_id, m = method, p = position, s = size, n = next

791

records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]

792

record_map = {}

793

for component_id, content, digest in \

794

self._data.read_records_iter(records):

795

method, position, size, next = position_map[component_id]

796

record_map[component_id] = method, content, digest, next

797

798

return record_map

799

800

def get_text(self, version_id):

801

"""See VersionedFile.get_text"""

802

return self.get_texts([version_id])[0]

803

804

def get_texts(self, version_ids):

805

return [''.join(l) for l in self.get_line_list(version_ids)]

806

807

def get_line_list(self, version_ids):

808

"""Return the texts of listed versions as a list of strings."""

809

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

810

for version_id in version_ids:

811

self.check_not_reserved_id(version_id)

812

text_map, content_map = self._get_content_maps(version_ids)

813

return [text_map[v] for v in version_ids]

814

815

def _get_content_maps(self, version_ids):

816

"""Produce maps of text and KnitContents

817

818

:return: (text_map, content_map) where text_map contains the texts for

819

the requested versions and content_map contains the KnitContents.

820

Both dicts take version_ids as their keys.

821

"""

822

for version_id in version_ids:

823

if not self.has_version(version_id):

824

raise RevisionNotPresent(version_id, self.filename)

825

record_map = self._get_record_map(version_ids)

826

827

text_map = {}

828

content_map = {}

829

final_content = {}

830

for version_id in version_ids:

831

components = []

832

cursor = version_id

833

while cursor is not None:

834

method, data, digest, next = record_map[cursor]

835

components.append((cursor, method, data, digest))

836

if cursor in content_map:

837

break

838

cursor = next

839

840

content = None

841

for component_id, method, data, digest in reversed(components):

842

if component_id in content_map:

843

content = content_map[component_id]

844

else:

845

if method == 'fulltext':

846

assert content is None

847

content = self.factory.parse_fulltext(data, version_id)

848

elif method == 'line-delta':

849

delta = self.factory.parse_line_delta(data, version_id)

850

content = content.copy()

851

content._lines = self._apply_delta(content._lines,

852

delta)

853

content_map[component_id] = content

854

855

if 'no-eol' in self._index.get_options(version_id):

856

content = content.copy()

857

line = content._lines[-1][1].rstrip('\n')

858

content._lines[-1] = (content._lines[-1][0], line)

859

final_content[version_id] = content

860

861

# digest here is the digest from the last applied component.

862

text = content.text()

863

if sha_strings(text) != digest:

864

raise KnitCorrupt(self.filename,

865

'sha-1 does not match %s' % version_id)

866

867

text_map[version_id] = text

868

return text_map, final_content

869

870

def iter_lines_added_or_present_in_versions(self, version_ids=None,

871

pb=None):

728

872

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

729

873

if version_ids is None:

730

874

version_ids = self.versions()

731

# we dont care about inclusions, the caller cares.

875

else:

876

version_ids = [osutils.safe_revision_id(v) for v in version_ids]

877

if pb is None:

878

pb = progress.DummyProgress()

879

# we don't care about inclusions, the caller cares.

732

880

# but we need to setup a list of records to visit.

733

881

# we need version_id, position, length

734

882

version_id_records = []

735

requested_versions = list(version_ids)

883

requested_versions = set(version_ids)

736

884

# filter for available versions

737

885

for version_id in requested_versions:

738

886

if not self.has_version(version_id):

739

887

raise RevisionNotPresent(version_id, self.filename)

740

888

# get a in-component-order queue:

741

version_ids = []

742

889

for version_id in self.versions():

743

890

if version_id in requested_versions:

744

version_ids.append(version_id)

745

891

data_pos, length = self._index.get_position(version_id)

746

892

version_id_records.append((version_id, data_pos, length))

747

893

748

pb = bzrlib.ui.ui_factory.nested_progress_bar()

749

count = 0

750

894

total = len(version_id_records)

751

try:

752

pb.update('Walking content.', count, total)

753

for version_id, data, sha_value in \

754

self._data.read_records_iter(version_id_records):

755

pb.update('Walking content.', count, total)

756

method = self._index.get_method(version_id)

757

version_idx = self._index.lookup(version_id)

758

assert method in ('fulltext', 'line-delta')

759

if method == 'fulltext':

760

content = self.factory.parse_fulltext(data, version_idx)

761

for line in content.text():

762

yield line

763

else:

764

delta = self.factory.parse_line_delta(data, version_idx)

765

for start, end, count, lines in delta:

766

for origin, line in lines:

767

yield line

768

count +=1

769

pb.update('Walking content.', total, total)

770

pb.finished()

771

except:

772

pb.update('Walking content.', total, total)

773

pb.finished()

774

raise

895

for version_idx, (version_id, data, sha_value) in \

896

enumerate(self._data.read_records_iter(version_id_records)):

897

pb.update('Walking content.', version_idx, total)

898

method = self._index.get_method(version_id)

899

900

assert method in ('fulltext', 'line-delta')

901

if method == 'fulltext':

902

line_iterator = self.factory.get_fulltext_content(data)

903

else:

904

line_iterator = self.factory.get_linedelta_content(data)

905

for line in line_iterator:

906

yield line

907

908

pb.update('Walking content.', total, total)

775

909

776

910

def num_versions(self):

777

911

"""See VersionedFile.num_versions()."""

781

915

782

916

def annotate_iter(self, version_id):

783

917

"""See VersionedFile.annotate_iter."""

918

version_id = osutils.safe_revision_id(version_id)

784

919

content = self._get_content(version_id)

785

920

for origin, text in content.annotate_iter():

786

921

yield origin, text

790

925

# perf notes:

791

926

# optimism counts!

792

927

# 52554 calls in 1264 872 internal down from 3674

928

version_id = osutils.safe_revision_id(version_id)

793

929

try:

794

930

return self._index.get_parents(version_id)

795

931

except KeyError:

797

933

798

934

def get_parents_with_ghosts(self, version_id):

799

935

"""See VersionedFile.get_parents."""

936

version_id = osutils.safe_revision_id(version_id)

800

937

try:

801

938

return self._index.get_parents_with_ghosts(version_id)

802

939

except KeyError:

808

945

versions = [versions]

809

946

if not versions:

810

947

return []

811

self._check_versions_present(versions)

948

versions = [osutils.safe_revision_id(v) for v in versions]

812

949

return self._index.get_ancestry(versions)

813

950

814

951

def get_ancestry_with_ghosts(self, versions):

817

954

versions = [versions]

818

955

if not versions:

819

956

return []

820

self._check_versions_present(versions)

957

versions = [osutils.safe_revision_id(v) for v in versions]

821

958

return self._index.get_ancestry_with_ghosts(versions)

822

959

823

960

#@deprecated_method(zero_eight)

843

980

844

981

def plan_merge(self, ver_a, ver_b):

845

982

"""See VersionedFile.plan_merge."""

983

ver_a = osutils.safe_revision_id(ver_a)

984

ver_b = osutils.safe_revision_id(ver_b)

846

985

ancestors_b = set(self.get_ancestry(ver_b))

847

986

def status_a(revision, text):

848

987

if revision in ancestors_b:

861

1000

annotated_b = self.annotate(ver_b)

862

1001

plain_a = [t for (a, t) in annotated_a]

863

1002

plain_b = [t for (a, t) in annotated_b]

864

blocks = SequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

1003

blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()

865

1004

a_cur = 0

866

1005

b_cur = 0

867

1006

for ai, bi, l in blocks:

884

1023

class _KnitComponentFile(object):

885

1024

"""One of the files used to implement a knit database"""

886

1025

887

def __init__(self, transport, filename, mode, file_mode=None):

1026

def __init__(self, transport, filename, mode, file_mode=None,

1027

create_parent_dir=False, dir_mode=None):

888

1028

self._transport = transport

889

1029

self._filename = filename

890

1030

self._mode = mode

891

self._file_mode=file_mode

1031

self._file_mode = file_mode

1032

self._dir_mode = dir_mode

1033

self._create_parent_dir = create_parent_dir

1034

self._need_to_create = False

892

1035

893

def write_header(self):

894

if self._transport.append(self._filename, StringIO(self.HEADER),

895

mode=self._file_mode):

896

raise KnitCorrupt(self._filename, 'misaligned after writing header')

1036

def _full_path(self):

1037

"""Return the full path to this file."""

1038

return self._transport.base + self._filename

897

1039

898

1040

def check_header(self, fp):

899

1041

line = fp.readline()

1042

if line == '':

1043

# An empty file can actually be treated as though the file doesn't

1044

# exist yet.

1045

raise errors.NoSuchFile(self._full_path())

900

1046

if line != self.HEADER:

901

raise KnitHeaderError(badline=line)

1047

raise KnitHeaderError(badline=line,

1048

filename=self._transport.abspath(self._filename))

902

1049

903

1050

def commit(self):

904

1051

"""Commit is a nop."""

932

1079

933

1080

The index file on disc contains a header, followed by one line per knit

934

1081

record. The same revision can be present in an index file more than once.

935

The first occurence gets assigned a sequence number starting from 0.

1082

The first occurrence gets assigned a sequence number starting from 0.

936

1083

937

1084

The format of a single line is

938

1085

REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n

949

1096

The ' :' marker is the end of record marker.

950

1097

951

1098

partial writes:

952

when a write is interrupted to the index file, it will result in a line that

953

does not end in ' :'. If the ' :' is not present at the end of a line, or at

954

the end of the file, then the record that is missing it will be ignored by

955

the parser.

1099

when a write is interrupted to the index file, it will result in a line

1100

that does not end in ' :'. If the ' :' is not present at the end of a line,

1101

or at the end of the file, then the record that is missing it will be

1102

ignored by the parser.

956

1103

957

When writing new records to the index file, the data is preceeded by '\n'

1104

When writing new records to the index file, the data is preceded by '\n'

958

1105

to ensure that records always start on new lines even if the last write was

959

1106

interrupted. As a result its normal for the last line in the index to be

960

1107

missing a trailing newline. One can be added with no harmful effects.

967

1114

968

1115

def _cache_version(self, version_id, options, pos, size, parents):

969

1116

"""Cache a version record in the history array and index cache.

970

971

This is inlined into __init__ for performance. KEEP IN SYNC.

1117

1118

This is inlined into _load_data for performance. KEEP IN SYNC.

972

1119

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

973

1120

indexes).

974

1121

"""

979

1126

self._history.append(version_id)

980

1127

else:

981

1128

index = self._cache[version_id][5]

982

self._cache[version_id] = (version_id,

1129

self._cache[version_id] = (version_id,

983

1130

options,

984

1131

pos,

985

1132

size,

986

1133

parents,

987

1134

index)

988

1135

989

def __init__(self, transport, filename, mode, create=False, file_mode=None):

990

_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)

1136

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1137

create_parent_dir=False, delay_create=False, dir_mode=None):

1138

_KnitComponentFile.__init__(self, transport, filename, mode,

1139

file_mode=file_mode,

1140

create_parent_dir=create_parent_dir,

1141

dir_mode=dir_mode)

991

1142

self._cache = {}

992

1143

# position in _history is the 'official' index for a revision

993

1144

# but the values may have come from a newer entry.

994

# so - wc -l of a knit index is != the number of uniqe names

995

# in the weave.

1145

# so - wc -l of a knit index is != the number of unique names

1146

# in the knit.

996

1147

self._history = []

997

pb = bzrlib.ui.ui_factory.nested_progress_bar()

998

1148

try:

999

count = 0

1000

total = 1

1149

fp = self._transport.get(self._filename)

1001

1150

try:

1002

pb.update('read knit index', count, total)

1003

fp = self._transport.get(self._filename)

1004

self.check_header(fp)

1005

# readlines reads the whole file at once:

1006

# bad for transports like http, good for local disk

1007

# we save 60 ms doing this one change (

1008

# from calling readline each time to calling

1009

# readlines once.

1010

# probably what we want for nice behaviour on

1011

# http is a incremental readlines that yields, or

1012

# a check for local vs non local indexes,

1013

for l in fp.readlines():

1014

rec = l.split()

1015

if len(rec) < 5 or rec[-1] != ':':

1016

# corrupt line.

1017

# FIXME: in the future we should determine if its a

1018

# short write - and ignore it

1019

# or a different failure, and raise. RBC 20060407

1020

continue

1021

count += 1

1022

total += 1

1023

#pb.update('read knit index', count, total)

1024

# See self._parse_parents

1025

parents = []

1026

for value in rec[4:-1]:

1027

if '.' == value[0]:

1028

# uncompressed reference

1029

parents.append(value[1:])

1030

else:

1031

# this is 15/4000ms faster than isinstance,

1032

# (in lsprof)

1033

# this function is called thousands of times a

1034

# second so small variations add up.

1035

assert value.__class__ is str

1036

parents.append(self._history[int(value)])

1037

# end self._parse_parents

1038

# self._cache_version(rec[0],

1039

# rec[1].split(','),

1040

# int(rec[2]),

1041

# int(rec[3]),

1042

# parents)

1043

# --- self._cache_version

1044

# only want the _history index to reference the 1st

1045

# index entry for version_id

1046

version_id = rec[0]

1047

if version_id not in self._cache:

1048

index = len(self._history)

1049

self._history.append(version_id)

1050

else:

1051

index = self._cache[version_id][5]

1052

self._cache[version_id] = (version_id,

1053

rec[1].split(','),

1054

int(rec[2]),

1055

int(rec[3]),

1056

parents,

1057

index)

1058

# --- self._cache_version

1059

except NoSuchFile, e:

1060

if mode != 'w' or not create:

1061

raise

1062

self.write_header()

1063

finally:

1064

pb.update('read knit index', total, total)

1065

pb.finished()

1066

1067

def _parse_parents(self, compressed_parents):

1068

"""convert a list of string parent values into version ids.

1069

1070

ints are looked up in the index.

1071

.FOO values are ghosts and converted in to FOO.

1072

1073

NOTE: the function is retained here for clarity, and for possible

1074

use in partial index reads. However bulk processing now has

1075

it inlined in __init__ for inner-loop optimisation.

1076

"""

1077

result = []

1078

for value in compressed_parents:

1079

if value[-1] == '.':

1080

# uncompressed reference

1081

result.append(value[1:])

1082

else:

1083

# this is 15/4000ms faster than isinstance,

1084

# this function is called thousands of times a

1085

# second so small variations add up.

1086

assert value.__class__ is str

1087

result.append(self._history[int(value)])

1088

return result

1151

# _load_data may raise NoSuchFile if the target knit is

1152

# completely empty.

1153

self._load_data(fp)

1154

finally:

1155

fp.close()

1156

except NoSuchFile:

1157

if mode != 'w' or not create:

1158

raise

1159

elif delay_create:

1160

self._need_to_create = True

1161

else:

1162

self._transport.put_bytes_non_atomic(

1163

self._filename, self.HEADER, mode=self._file_mode)

1164

1165

def _load_data(self, fp):

1166

cache = self._cache

1167

history = self._history

1168

1169

self.check_header(fp)

1170

# readlines reads the whole file at once:

1171

# bad for transports like http, good for local disk

1172

# we save 60 ms doing this one change (

1173

# from calling readline each time to calling

1174

# readlines once.

1175

# probably what we want for nice behaviour on

1176

# http is a incremental readlines that yields, or

1177

# a check for local vs non local indexes,

1178

history_top = len(history) - 1

1179

for line in fp.readlines():

1180

rec = line.split()

1181

if len(rec) < 5 or rec[-1] != ':':

1182

# corrupt line.

1183

# FIXME: in the future we should determine if its a

1184

# short write - and ignore it

1185

# or a different failure, and raise. RBC 20060407

1186

continue

1187

1188

parents = []

1189

for value in rec[4:-1]:

1190

if value[0] == '.':

1191

# uncompressed reference

1192

parent_id = value[1:]

1193

else:

1194

parent_id = history[int(value)]

1195

parents.append(parent_id)

1196

1197

version_id, options, pos, size = rec[:4]

1198

version_id = version_id

1199

1200

# See self._cache_version

1201

# only want the _history index to reference the 1st

1202

# index entry for version_id

1203

if version_id not in cache:

1204

history_top += 1

1205

index = history_top

1206

history.append(version_id)

1207

else:

1208

index = cache[version_id][5]

1209

cache[version_id] = (version_id,

1210

options.split(','),

1211

int(pos),

1212

int(size),

1213

parents,

1214

index)

1215

# end self._cache_version

1089

1216

1090

1217

def get_graph(self):

1091

graph = []

1092

for version_id, index in self._cache.iteritems():

1093

graph.append((version_id, index[4]))

1094

return graph

1218

return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]

1095

1219

1096

1220

def get_ancestry(self, versions):

1097

1221

"""See VersionedFile.get_ancestry."""

1098

1222

# get a graph of all the mentioned versions:

1099

1223

graph = {}

1100

1224

pending = set(versions)

1101

while len(pending):

1225

cache = self._cache

1226

while pending:

1102

1227

version = pending.pop()

1103

parents = self._cache[version][4]

1104

# got the parents ok

1105

1228

# trim ghosts

1106

parents = [parent for parent in parents if parent in self._cache]

1107

for parent in parents:

1108

# if not completed and not a ghost

1109

if parent not in graph:

1110

pending.add(parent)

1229

try:

1230

parents = [p for p in cache[version][4] if p in cache]

1231

except KeyError:

1232

raise RevisionNotPresent(version, self._filename)

1233

# if not completed and not a ghost

1234

pending.update([p for p in parents if p not in graph])

1111

1235

graph[version] = parents

1112

1236

return topo_sort(graph.items())

1113

1237

1114

1238

def get_ancestry_with_ghosts(self, versions):

1115

1239

"""See VersionedFile.get_ancestry_with_ghosts."""

1116

1240

# get a graph of all the mentioned versions:

1241

self.check_versions_present(versions)

1242

cache = self._cache

1117

1243

graph = {}

1118

1244

pending = set(versions)

1119

while len(pending):

1245

while pending:

1120

1246

version = pending.pop()

1121

1247

try:

1122

parents = self._cache[version][4]

1248

parents = cache[version][4]

1123

1249

except KeyError:

1124

1250

# ghost, fake it

1125

1251

graph[version] = []

1126

pass

1127

1252

else:

1128

# got the parents ok

1129

for parent in parents:

1130

if parent not in graph:

1131

pending.add(parent)

1253

# if not completed

1254

pending.update([p for p in parents if p not in graph])

1132

1255

graph[version] = parents

1133

1256

return topo_sort(graph.items())

1134

1257

1149

1272

1150

1273

def _version_list_to_index(self, versions):

1151

1274

result_list = []

1275

cache = self._cache

1152

1276

for version in versions:

1153

if version in self._cache:

1277

if version in cache:

1154

1278

# -- inlined lookup() --

1155

result_list.append(str(self._cache[version][5]))

1279

result_list.append(str(cache[version][5]))

1156

1280

# -- end lookup () --

1157

1281

else:

1158

result_list.append('.' + version.encode('utf-8'))

1282

result_list.append('.' + version)

1159

1283

return ' '.join(result_list)

1160

1284

1161

1285

def add_version(self, version_id, options, pos, size, parents):

1169

1293

(version_id, options, pos, size, parents).

1170

1294

"""

1171

1295

lines = []

1172

for version_id, options, pos, size, parents in versions:

1173

line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),

1174

','.join(options),

1175

pos,

1176

size,

1177

self._version_list_to_index(parents))

1178

assert isinstance(line, str), \

1179

'content must be utf-8 encoded: %r' % (line,)

1180

lines.append(line)

1181

self._transport.append(self._filename, StringIO(''.join(lines)))

1182

# cache after writing, so that a failed write leads to missing cache

1183

# entries not extra ones. XXX TODO: RBC 20060502 in the event of a

1184

# failure, reload the index or flush it or some such, to prevent

1185

# writing records that did complete twice.

1186

for version_id, options, pos, size, parents in versions:

1187

self._cache_version(version_id, options, pos, size, parents)

1188

1296

orig_history = self._history[:]

1297

orig_cache = self._cache.copy()

1298

1299

try:

1300

for version_id, options, pos, size, parents in versions:

1301

line = "\n%s %s %s %s %s :" % (version_id,

1302

','.join(options),

1303

pos,

1304

size,

1305

self._version_list_to_index(parents))

1306

assert isinstance(line, str), \

1307

'content must be utf-8 encoded: %r' % (line,)

1308

lines.append(line)

1309

self._cache_version(version_id, options, pos, size, parents)

1310

if not self._need_to_create:

1311

self._transport.append_bytes(self._filename, ''.join(lines))

1312

else:

1313

sio = StringIO()

1314

sio.write(self.HEADER)

1315

sio.writelines(lines)

1316

sio.seek(0)

1317

self._transport.put_file_non_atomic(self._filename, sio,

1318

create_parent_dir=self._create_parent_dir,

1319

mode=self._file_mode,

1320

dir_mode=self._dir_mode)

1321

self._need_to_create = False

1322

except:

1323

# If any problems happen, restore the original values and re-raise

1324

self._history = orig_history

1325

self._cache = orig_cache

1326

raise

1327

1189

1328

def has_version(self, version_id):

1190

1329

"""True if the version is in the index."""

1191

return self._cache.has_key(version_id)

1330

return version_id in self._cache

1192

1331

1193

1332

def get_position(self, version_id):

1194

1333

"""Return data position and size of specified version."""

1195

return (self._cache[version_id][2], \

1196

self._cache[version_id][3])

1334

entry = self._cache[version_id]

1335

return entry[2], entry[3]

1197

1336

1198

1337

def get_method(self, version_id):

1199

1338

"""Return compression method of specified version."""

1201

1340

if 'fulltext' in options:

1202

1341

return 'fulltext'

1203

1342

else:

1204

assert 'line-delta' in options

1343

if 'line-delta' not in options:

1344

raise errors.KnitIndexUnknownMethod(self._full_path(), options)

1205

1345

return 'line-delta'

1206

1346

1207

1347

def get_options(self, version_id):

1213

1353

if parent in self._cache]

1214

1354

1215

1355

def get_parents_with_ghosts(self, version_id):

1216

"""Return parents of specified version wth ghosts."""

1356

"""Return parents of specified version with ghosts."""

1217

1357

return self._cache[version_id][4]

1218

1358

1219

1359

def check_versions_present(self, version_ids):

1220

1360

"""Check that all specified versions are present."""

1221

version_ids = set(version_ids)

1222

for version_id in list(version_ids):

1223

if version_id in self._cache:

1224

version_ids.remove(version_id)

1225

if version_ids:

1226

raise RevisionNotPresent(list(version_ids)[0], self.filename)

1361

cache = self._cache

1362

for version_id in version_ids:

1363

if version_id not in cache:

1364

raise RevisionNotPresent(version_id, self._filename)

1227

1365

1228

1366

1229

1367

class _KnitData(_KnitComponentFile):

1230

1368

"""Contents of the knit data file"""

1231

1369

1232

HEADER = "# bzr knit data 8\n"

1233

1234

def __init__(self, transport, filename, mode, create=False, file_mode=None):

1235

_KnitComponentFile.__init__(self, transport, filename, mode)

1236

self._file = None

1370

def __init__(self, transport, filename, mode, create=False, file_mode=None,

1371

create_parent_dir=False, delay_create=False,

1372

dir_mode=None):

1373

_KnitComponentFile.__init__(self, transport, filename, mode,

1374

file_mode=file_mode,

1375

create_parent_dir=create_parent_dir,

1376

dir_mode=dir_mode)

1237

1377

self._checked = False

1378

# TODO: jam 20060713 conceptually, this could spill to disk

1379

# if the cached size gets larger than a certain amount

1380

# but it complicates the model a bit, so for now just use

1381

# a simple dictionary

1382

self._cache = {}

1383

self._do_cache = False

1238

1384

if create:

1239

self._transport.put(self._filename, StringIO(''), mode=file_mode)

1240

self._records = {}

1385

if delay_create:

1386

self._need_to_create = create

1387

else:

1388

self._transport.put_bytes_non_atomic(self._filename, '',

1389

mode=self._file_mode)

1390

1391

def enable_cache(self):

1392

"""Enable caching of reads."""

1393

self._do_cache = True

1241

1394

1242

1395

def clear_cache(self):

1243

1396

"""Clear the record cache."""

1244

self._records = {}

1397

self._do_cache = False

1398

self._cache = {}

1245

1399

1246

1400

def _open_file(self):

1247

if self._file is None:

1248

try:

1249

self._file = self._transport.get(self._filename)

1250

except NoSuchFile:

1251

pass

1252

return self._file

1401

try:

1402

return self._transport.get(self._filename)

1403

except NoSuchFile:

1404

pass

1405

return None

1253

1406

1254

1407

def _record_to_data(self, version_id, digest, lines):

1255

1408

"""Convert version_id, digest, lines into a raw data block.

1258

1411

"""

1259

1412

sio = StringIO()

1260

1413

data_file = GzipFile(None, mode='wb', fileobj=sio)

1414

1415

assert isinstance(version_id, str)

1261

1416

data_file.writelines(chain(

1262

["version %s %d %s\n" % (version_id.encode('utf-8'),

1417

["version %s %d %s\n" % (version_id,

1263

1418

len(lines),

1264

1419

digest)],

1265

1420

lines,

1266

["end %s\n" % version_id.encode('utf-8')]))

1421

["end %s\n" % version_id]))

1267

1422

data_file.close()

1268

1423

length= sio.tell()

1269

1424

1276

1431

:return: the offset in the data file raw_data was written.

1277

1432

"""

1278

1433

assert isinstance(raw_data, str), 'data must be plain bytes'

1279

return self._transport.append(self._filename, StringIO(raw_data))

1434

if not self._need_to_create:

1435

return self._transport.append_bytes(self._filename, raw_data)

1436

else:

1437

self._transport.put_bytes_non_atomic(self._filename, raw_data,

1438

create_parent_dir=self._create_parent_dir,

1439

mode=self._file_mode,

1440

dir_mode=self._dir_mode)

1441

self._need_to_create = False

1442

return 0

1280

1443

1281

1444

def add_record(self, version_id, digest, lines):

1282

1445

"""Write new text record to disk. Returns the position in the

1283

1446

file where it was written."""

1284

1447

size, sio = self._record_to_data(version_id, digest, lines)

1285

# cache

1286

self._records[version_id] = (digest, lines)

1287

1448

# write to disk

1288

start_pos = self._transport.append(self._filename, sio)

1449

if not self._need_to_create:

1450

start_pos = self._transport.append_file(self._filename, sio)

1451

else:

1452

self._transport.put_file_non_atomic(self._filename, sio,

1453

create_parent_dir=self._create_parent_dir,

1454

mode=self._file_mode,

1455

dir_mode=self._dir_mode)

1456

self._need_to_create = False

1457

start_pos = 0

1458

if self._do_cache:

1459

self._cache[version_id] = sio.getvalue()

1289

1460

return start_pos, size

1290

1461

1291

1462

def _parse_record_header(self, version_id, raw_data):

1295

1466

as (stream, header_record)

1296

1467

"""

1297

1468

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1298

rec = df.readline().split()

1469

rec = self._check_header(version_id, df.readline())

1470

return df, rec

1471

1472

def _check_header(self, version_id, line):

1473

rec = line.split()

1299

1474

if len(rec) != 4:

1300

raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')

1301

if rec[1].decode('utf-8')!= version_id:

1302

raise KnitCorrupt(self._filename,

1303

'unexpected version, wanted %r, got %r' % (

1304

version_id, rec[1]))

1305

return df, rec

1475

raise KnitCorrupt(self._filename,

1476

'unexpected number of elements in record header')

1477

if rec[1] != version_id:

1478

raise KnitCorrupt(self._filename,

1479

'unexpected version, wanted %r, got %r'

1480

% (version_id, rec[1]))

1481

return rec

1306

1482

1307

1483

def _parse_record(self, version_id, data):

1308

1484

# profiling notes:

1309

1485

# 4168 calls in 2880 217 internal

1310

1486

# 4168 calls to _parse_record_header in 2121

1311

1487

# 4168 calls to readlines in 330

1312

df, rec = self._parse_record_header(version_id, data)

1488

df = GzipFile(mode='rb', fileobj=StringIO(data))

1489

1313

1490

record_contents = df.readlines()

1314

l = record_contents.pop()

1491

header = record_contents.pop(0)

1492

rec = self._check_header(version_id, header)

1493

1494

last_line = record_contents.pop()

1315

1495

assert len(record_contents) == int(rec[2])

1316

if l.decode('utf-8') != 'end %s\n' % version_id:

1317

raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'

1318

% (l, version_id))

1496

if last_line != 'end %s\n' % rec[1]:

1497

raise KnitCorrupt(self._filename,

1498

'unexpected version end line %r, wanted %r'

1499

% (last_line, version_id))

1319

1500

df.close()

1320

1501

return record_contents, rec[3]

1321

1502

1324

1505

1325

1506

This unpacks enough of the text record to validate the id is

1326

1507

as expected but thats all.

1327

1328

It will actively recompress currently cached records on the

1329

basis that that is cheaper than I/O activity.

1330

1508

"""

1331

needed_records = []

1332

for version_id, pos, size in records:

1333

if version_id not in self._records:

1334

needed_records.append((version_id, pos, size))

1335

1336

1509

# setup an iterator of the external records:

1337

1510

# uses readv so nice and fast we hope.

1338

if len(needed_records):

1511

if len(records):

1339

1512

# grab the disk data needed.

1340

raw_records = self._transport.readv(self._filename,

1341

[(pos, size) for version_id, pos, size in needed_records])

1513

if self._cache:

1514

# Don't check _cache if it is empty

1515

needed_offsets = [(pos, size) for version_id, pos, size

1516

in records

1517

if version_id not in self._cache]

1518

else:

1519

needed_offsets = [(pos, size) for version_id, pos, size

1520

in records]

1521

1522

raw_records = self._transport.readv(self._filename, needed_offsets)

1342

1523

1343

1524

for version_id, pos, size in records:

1344

if version_id in self._records:

1345

# compress a new version

1346

size, sio = self._record_to_data(version_id,

1347

self._records[version_id][0],

1348

self._records[version_id][1])

1349

yield version_id, sio.getvalue()

1525

if version_id in self._cache:

1526

# This data has already been validated

1527

data = self._cache[version_id]

1350

1528

else:

1351

1529

pos, data = raw_records.next()

1530

if self._do_cache:

1531

self._cache[version_id] = data

1532

1352

1533

# validate the header

1353

1534

df, rec = self._parse_record_header(version_id, data)

1354

1535

df.close()

1355

yield version_id, data

1356

1536

yield version_id, data

1357

1537

1358

1538

def read_records_iter(self, records):

1359

1539

"""Read text records from data file and yield result.

1360

1540

1361

Each passed record is a tuple of (version_id, pos, len) and

1362

will be read in the given order. Yields (version_id,

1363

contents, digest).

1541

The result will be returned in whatever is the fastest to read.

1542

Not by the order requested. Also, multiple requests for the same

1543

record will only yield 1 response.

1544

:param records: A list of (version_id, pos, len) entries

1545

:return: Yields (version_id, contents, digest) in the order

1546

read, not the order requested

1364

1547

"""

1365

# profiling notes:

1366

# 60890 calls for 4168 extractions in 5045, 683 internal.

1367

# 4168 calls to readv in 1411

1368

# 4168 calls to parse_record in 2880

1369

1370

needed_records = []

1371

for version_id, pos, size in records:

1372

if version_id not in self._records:

1373

needed_records.append((version_id, pos, size))

1374

1375

if len(needed_records):

1376

# We take it that the transport optimizes the fetching as good

1377

# as possible (ie, reads continous ranges.)

1378

response = self._transport.readv(self._filename,

1379

[(pos, size) for version_id, pos, size in needed_records])

1380

1381

for (record_id, pos, size), (pos, data) in izip(iter(needed_records), response):

1382

content, digest = self._parse_record(record_id, data)

1383

self._records[record_id] = (digest, content)

1384

1385

for version_id, pos, size in records:

1386

yield version_id, list(self._records[version_id][1]), self._records[version_id][0]

1548

if not records:

1549

return

1550

1551

if self._cache:

1552

# Skip records we have alread seen

1553

yielded_records = set()

1554

needed_records = set()

1555

for record in records:

1556

if record[0] in self._cache:

1557

if record[0] in yielded_records:

1558

continue

1559

yielded_records.add(record[0])

1560

data = self._cache[record[0]]

1561

content, digest = self._parse_record(record[0], data)

1562

yield (record[0], content, digest)

1563

else:

1564

needed_records.add(record)

1565

needed_records = sorted(needed_records, key=operator.itemgetter(1))

1566

else:

1567

needed_records = sorted(set(records), key=operator.itemgetter(1))

1568

1569

if not needed_records:

1570

return

1571

1572

# The transport optimizes the fetching as well

1573

# (ie, reads continuous ranges.)

1574

readv_response = self._transport.readv(self._filename,

1575

[(pos, size) for version_id, pos, size in needed_records])

1576

1577

for (version_id, pos, size), (pos, data) in \

1578

izip(iter(needed_records), readv_response):

1579

content, digest = self._parse_record(version_id, data)

1580

if self._do_cache:

1581

self._cache[version_id] = data

1582

yield version_id, content, digest

1387

1583

1388

1584

def read_records(self, records):

1389

1585

"""Read records into a dictionary."""

1390

1586

components = {}

1391

for record_id, content, digest in self.read_records_iter(records):

1587

for record_id, content, digest in \

1588

self.read_records_iter(records):

1392

1589

components[record_id] = (content, digest)

1393

1590

return components

1394

1591

1418

1615

if not version_ids:

1419

1616

return 0

1420

1617

1421

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1618

pb = ui.ui_factory.nested_progress_bar()

1422

1619

try:

1423

1620

version_ids = list(version_ids)

1424

1621

if None in version_ids:

1469

1666

# if source has the parent, we must :

1470

1667

# * already have it or

1471

1668

# * have it scheduled already

1472

# otherwise we dont care

1669

# otherwise we don't care

1473

1670

assert (self.target.has_version(parent) or

1474

1671

parent in copy_set or

1475

1672

not self.source.has_version(parent))

1535

1732

if not version_ids:

1536

1733

return 0

1537

1734

1538

pb = bzrlib.ui.ui_factory.nested_progress_bar()

1735

pb = ui.ui_factory.nested_progress_bar()

1539

1736

try:

1540

1737

version_ids = list(version_ids)

1541

1738

1603

1800

InterVersionedFile.register_optimiser(WeaveToKnit)

1604

1801

1605

1802

1606

class SequenceMatcher(difflib.SequenceMatcher):

1803

class KnitSequenceMatcher(difflib.SequenceMatcher):

1607

1804

"""Knit tuned sequence matcher.

1608

1805

1609

1806

This is based on profiling of difflib which indicated some improvements

1680

1877

j2lenget = j2len.get

1681

1878

newj2len = {}

1682

1879

1683

# changing b2j.get(a[i], nothing) to a try:Keyerror pair produced the

1880

# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the

1684

1881

# following improvement

1685

1882

# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)

1686

1883

# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>

1735

1932

bestsize = bestsize + 1

1736

1933

1737

1934

return besti, bestj, bestsize

1738

Older »