~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2008-07-11 21:41:24 UTC
mto: This revision was merged to the branch mainline in revision 3543.
Revision ID: john@arbash-meinel.com-20080711214124-qi09irlj7pd5cuzg

Shortcut the case when one revision is in the ancestry of the other.

At the cost of a heads() check, when one parent supersedes, we don't have to extract
the text for the other. Changes merge time from 3m37s => 3m21s. Using a
CachingParentsProvider would drop the time down to 3m11s.

files added:
tools/win32/survey.txt

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/btree_index.py

bzrlib/chunk_writer.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/fake_command.py

bzrlib/tests/https_server.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers/btree_index_prefetch.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/lca_tree_merging.txt

doc/developers/overview.txt

doc/developers/ppa.txt

doc/developers/testing.txt

doc/en/user-guide/stacked.txt

doc/news-template.txt

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/branch.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_config.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/development-repo.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from itertools import izip, chain

import operator

import os

import urllib

import sys

import warnings

from zlib import Z_DEFAULT_COMPRESSION

import bzrlib

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import (

annotate,

debug,

diff,

graph as _mod_graph,

index as _mod_index,

lru_cache,

pack,

progress,

trace,

tsort,

tuned_gzip,

)

""")

from bzrlib import (

cache_utf8,

debug,

diff,

errors,

osutils,

patiencediff,

progress,

merge,

ui,

)

from bzrlib.errors import (

FileExists,

101

KnitHeaderError,

102

RevisionNotPresent,

103

RevisionAlreadyPresent,

SHA1KnitCorrupt,

100

104

)

105

from bzrlib.graph import Graph

101

106

from bzrlib.osutils import (

102

107

contains_whitespace,

103

108

contains_linebreaks,

105

110

sha_strings,

106

111

split_lines,

107

112

)

113

from bzrlib.tsort import topo_sort

114

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

115

import bzrlib.ui

108

116

from bzrlib.versionedfile import (

109

117

AbsentContentFactory,

110

118

adapter_registry,

111

119

ConstantMapper,

112

120

ContentFactory,

113

ChunkedContentFactory,

121

FulltextContentFactory,

114

122

VersionedFile,

115

123

VersionedFiles,

116

124

)

125

import bzrlib.weave

117

126

118

127

119

128

# TODO: Split out code specific to this format into an associated object.

196

205

[compression_parent], 'unordered', True).next()

197

206

if basis_entry.storage_kind == 'absent':

198

207

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

199

basis_chunks = basis_entry.get_bytes_as('chunked')

200

basis_lines = osutils.chunks_to_lines(basis_chunks)

208

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

201

209

# Manually apply the delta because we have one annotated content and

202

210

# one plain.

203

211

basis_content = PlainKnitContent(basis_lines, compression_parent)

230

238

[compression_parent], 'unordered', True).next()

231

239

if basis_entry.storage_kind == 'absent':

232

240

raise errors.RevisionNotPresent(compression_parent, self._basis_vf)

233

basis_chunks = basis_entry.get_bytes_as('chunked')

234

basis_lines = osutils.chunks_to_lines(basis_chunks)

241

basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))

235

242

basis_content = PlainKnitContent(basis_lines, compression_parent)

236

243

# Manually apply the delta because we have one annotated content and

237

244

# one plain.

278

285

def get_bytes_as(self, storage_kind):

279

286

if storage_kind == self.storage_kind:

280

287

return self._raw_record

281

if self._knit is not None:

282

if storage_kind == 'chunked':

283

return self._knit.get_lines(self.key[0])

284

elif storage_kind == 'fulltext':

285

return self._knit.get_text(self.key[0])

286

raise errors.UnavailableRepresentation(self.key, storage_kind,

287

self.storage_kind)

288

if storage_kind == 'fulltext' and self._knit is not None:

289

return self._knit.get_text(self.key[0])

290

else:

291

raise errors.UnavailableRepresentation(self.key, storage_kind,

292

self.storage_kind)

288

293

289

294

290

295

class KnitContent(object):

704

709

"""Storage for many versioned files using knit compression.

705

710

706

711

Backend storage is managed by indices and data objects.

707

708

:ivar _index: A _KnitGraphIndex or similar that can describe the

709

parents, graph, compression and data location of entries in this

710

KnitVersionedFiles. Note that this is only the index for

711

*this* vfs; if there are fallbacks they must be queried separately.

712

"""

713

714

def __init__(self, index, data_access, max_delta_chain=200,

715

annotated=False, reload_func=None):

715

annotated=False):

716

"""Create a KnitVersionedFiles with index and data_access.

717

718

:param index: The index for the knit data.

722

insertion. Set to 0 to prohibit the use of deltas.

723

:param annotated: Set to True to cause annotations to be calculated and

724

stored during insertion.

725

:param reload_func: An function that can be called if we think we need

726

to reload the pack listing and try again. See

727

'bzrlib.repofmt.pack_repo.AggregateIndex' for the signature.

728

725

"""

729

726

self._index = index

730

727

self._access = data_access

733

730

self._factory = KnitAnnotateFactory()

734

731

else:

735

732

self._factory = KnitPlainFactory()

736

self._fallback_vfs = []

737

self._reload_func = reload_func

738

739

def __repr__(self):

740

return "%s(%r, %r)" % (

741

self.__class__.__name__,

742

self._index,

743

self._access)

744

745

def add_fallback_versioned_files(self, a_versioned_files):

746

"""Add a source of texts for texts not present in this knit.

747

748

:param a_versioned_files: A VersionedFiles object.

749

"""

750

self._fallback_vfs.append(a_versioned_files)

751

733

752

734

def add_lines(self, key, parents, lines, parent_texts=None,

753

735

left_matching_blocks=None, nostore_sha=None, random_id=False,

779

761

present_parents = []

780

762

if parent_texts is None:

781

763

parent_texts = {}

782

# Do a single query to ascertain parent presence; we only compress

783

# against parents in the same kvf.

784

present_parent_map = self._index.get_parent_map(parents)

764

# Do a single query to ascertain parent presence.

765

present_parent_map = self.get_parent_map(parents)

785

766

for parent in parents:

786

767

if parent in present_parent_map:

787

768

present_parents.append(parent)

858

839

# impact 'bzr check' substantially, and needs to be integrated with

859

840

# care. However, it does check for the obvious problem of a delta with

860

841

# no basis.

861

keys = self._index.keys()

842

keys = self.keys()

862

843

parent_map = self.get_parent_map(keys)

863

844

for key in keys:

864

845

if self._index.get_method(key) != 'fulltext':

867

848

raise errors.KnitCorrupt(self,

868

849

"Missing basis parent %s for %s" % (

869

850

compression_parent, key))

870

for fallback_vfs in self._fallback_vfs:

871

fallback_vfs.check()

872

851

873

852

def _check_add(self, key, lines, random_id, check_content):

874

853

"""check that version_id and lines are safe to add."""

875

854

version_id = key[-1]

876

855

if contains_whitespace(version_id):

877

raise InvalidRevisionId(version_id, self)

856

raise InvalidRevisionId(version_id, self.filename)

878

857

self.check_not_reserved_id(version_id)

879

858

# TODO: If random_id==False and the key is already present, we should

880

859

# probably check that the existing content is identical to what is

912

891

delta_size = 0

913

892

fulltext_size = None

914

893

for count in xrange(self._max_delta_chain):

915

try:

916

# Note that this only looks in the index of this particular

917

# KnitVersionedFiles, not in the fallbacks. This ensures that

918

# we won't store a delta spanning physical repository

919

# boundaries.

920

build_details = self._index.get_build_details([parent])

921

parent_details = build_details[parent]

922

except (RevisionNotPresent, KeyError), e:

923

# Some basis is not locally present: always fulltext

924

return False

925

index_memo, compression_parent, _, _ = parent_details

926

_, _, size = index_memo

927

if compression_parent is None:

894

# XXX: Collapse these two queries:

895

method = self._index.get_method(parent)

896

index, pos, size = self._index.get_position(parent)

897

if method == 'fulltext':

928

898

fulltext_size = size

929

899

break

930

900

delta_size += size

931

901

# We don't explicitly check for presence because this is in an

932

902

# inner loop, and if it's missing it'll fail anyhow.

933

parent = compression_parent

903

# TODO: This should be asking for compression parent, not graph

904

# parent.

905

parent = self._index.get_parent_map([parent])[parent][0]

934

906

else:

935

907

# We couldn't find a fulltext, so we must create a new one

936

908

return False

989

961

text_map, contents_map = self._get_content_maps([key])

990

962

return contents_map[key]

991

963

992

def _get_content_maps(self, keys, nonlocal_keys=None):

964

def _get_content_maps(self, keys):

993

965

"""Produce maps of text and KnitContents

994

966

995

:param keys: The keys to produce content maps for.

996

:param nonlocal_keys: An iterable of keys(possibly intersecting keys)

997

which are known to not be in this knit, but rather in one of the

998

fallback knits.

999

967

:return: (text_map, content_map) where text_map contains the texts for

1000

968

the requested versions and content_map contains the KnitContents.

1001

969

"""

1005

973

# final output.

1006

974

keys = list(keys)

1007

975

multiple_versions = len(keys) != 1

1008

record_map = self._get_record_map(keys, allow_missing=True)

976

record_map = self._get_record_map(keys)

1009

977

1010

978

text_map = {}

1011

979

content_map = {}

1012

980

final_content = {}

1013

if nonlocal_keys is None:

1014

nonlocal_keys = set()

1015

else:

1016

nonlocal_keys = frozenset(nonlocal_keys)

1017

missing_keys = set(nonlocal_keys)

1018

for source in self._fallback_vfs:

1019

if not missing_keys:

1020

break

1021

for record in source.get_record_stream(missing_keys,

1022

'unordered', True):

1023

if record.storage_kind == 'absent':

1024

continue

1025

missing_keys.remove(record.key)

1026

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1027

text_map[record.key] = lines

1028

content_map[record.key] = PlainKnitContent(lines, record.key)

1029

if record.key in keys:

1030

final_content[record.key] = content_map[record.key]

1031

981

for key in keys:

1032

if key in nonlocal_keys:

1033

# already handled

1034

continue

1035

982

components = []

1036

983

cursor = key

1037

984

while cursor is not None:

1038

try:

1039

record, record_details, digest, next = record_map[cursor]

1040

except KeyError:

1041

raise RevisionNotPresent(cursor, self)

985

record, record_details, digest, next = record_map[cursor]

1042

986

components.append((cursor, record, record_details, digest))

1043

cursor = next

1044

987

if cursor in content_map:

1045

# no need to plan further back

1046

components.append((cursor, None, None, None))

1047

988

break

989

cursor = next

1048

990

1049

991

content = None

1050

992

for (component_id, record, record_details,

1064

1006

text = content.text()

1065

1007

actual_sha = sha_strings(text)

1066

1008

if actual_sha != digest:

1067

raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)

1009

raise KnitCorrupt(self,

1010

'\n sha-1 %s'

1011

'\n of reconstructed text does not match'

1012

'\n expected %s'

1013

'\n for version %s' %

1014

(actual_sha, digest, key))

1068

1015

text_map[key] = text

1069

1016

return text_map, final_content

1070

1017

1071

1018

def get_parent_map(self, keys):

1072

"""Get a map of the graph parents of keys.

1019

"""Get a map of the parents of keys.

1073

1020

1074

1021

:param keys: The keys to look up parents for.

1075

1022

:return: A mapping from keys to parents. Absent keys are absent from

1076

1023

the mapping.

1077

1024

"""

1078

return self._get_parent_map_with_sources(keys)[0]

1079

1080

def _get_parent_map_with_sources(self, keys):

1081

"""Get a map of the parents of keys.

1082

1083

:param keys: The keys to look up parents for.

1084

:return: A tuple. The first element is a mapping from keys to parents.

1085

Absent keys are absent from the mapping. The second element is a

1086

list with the locations each key was found in. The first element

1087

is the in-this-knit parents, the second the first fallback source,

1088

and so on.

1089

"""

1090

result = {}

1091

sources = [self._index] + self._fallback_vfs

1092

source_results = []

1093

missing = set(keys)

1094

for source in sources:

1095

if not missing:

1096

break

1097

new_result = source.get_parent_map(missing)

1098

source_results.append(new_result)

1099

result.update(new_result)

1100

missing.difference_update(set(new_result))

1101

return result, source_results

1102

1103

def _get_record_map(self, keys, allow_missing=False):

1025

return self._index.get_parent_map(keys)

1026

1027

def _get_record_map(self, keys):

1104

1028

"""Produce a dictionary of knit records.

1105

1029

1106

1030

:return: {key:(record, record_details, digest, next)}

1113

1037

1114

1038

build-parent of the version, i.e. the leftmost ancestor.

1115

1039

Will be None if the record is not a delta.

1116

:param keys: The keys to build a map for

1117

:param allow_missing: If some records are missing, rather than

1118

error, just return the data that could be generated.

1119

"""

1120

# This retries the whole request if anything fails. Potentially we

1121

# could be a bit more selective. We could track the keys whose records

1122

# we have successfully found, and then only request the new records

1123

# from there. However, _get_components_positions grabs the whole build

1124

# chain, which means we'll likely try to grab the same records again

1125

# anyway. Also, can the build chains change as part of a pack

1126

# operation? We wouldn't want to end up with a broken chain.

1127

while True:

1128

try:

1129

position_map = self._get_components_positions(keys,

1130

allow_missing=allow_missing)

1131

# key = component_id, r = record_details, i_m = index_memo,

1132

# n = next

1133

records = [(key, i_m) for key, (r, i_m, n)

1134

in position_map.iteritems()]

1135

record_map = {}

1136

for key, record, digest in self._read_records_iter(records):

1137

(record_details, index_memo, next) = position_map[key]

1138

record_map[key] = record, record_details, digest, next

1139

return record_map

1140

except errors.RetryWithNewPacks, e:

1141

self._access.reload_or_raise(e)

1142

1143

def _split_by_prefix(self, keys):

1144

"""For the given keys, split them up based on their prefix.

1145

1146

To keep memory pressure somewhat under control, split the

1147

requests back into per-file-id requests, otherwise "bzr co"

1148

extracts the full tree into memory before writing it to disk.

1149

This should be revisited if _get_content_maps() can ever cross

1150

file-id boundaries.

1151

1152

:param keys: An iterable of key tuples

1153

:return: A dict of {prefix: [key_list]}

1154

"""

1155

split_by_prefix = {}

1156

for key in keys:

1157

if len(key) == 1:

1158

split_by_prefix.setdefault('', []).append(key)

1159

else:

1160

split_by_prefix.setdefault(key[0], []).append(key)

1161

return split_by_prefix

1040

"""

1041

position_map = self._get_components_positions(keys)

1042

# key = component_id, r = record_details, i_m = index_memo, n = next

1043

records = [(key, i_m) for key, (r, i_m, n)

1044

in position_map.iteritems()]

1045

record_map = {}

1046

for key, record, digest in \

1047

self._read_records_iter(records):

1048

(record_details, index_memo, next) = position_map[key]

1049

record_map[key] = record, record_details, digest, next

1050

return record_map

1162

1051

1163

1052

def get_record_stream(self, keys, ordering, include_delta_closure):

1164

1053

"""Get a stream of records for keys.

1174

1063

"""

1175

1064

# keys might be a generator

1176

1065

keys = set(keys)

1177

if not keys:

1178

return

1179

1066

if not self._index.has_graph:

1180

1067

# Cannot topological order when no graph has been stored.

1181

1068

ordering = 'unordered'

1182

1183

remaining_keys = keys

1184

while True:

1185

try:

1186

keys = set(remaining_keys)

1187

for content_factory in self._get_remaining_record_stream(keys,

1188

ordering, include_delta_closure):

1189

remaining_keys.discard(content_factory.key)

1190

yield content_factory

1191

return

1192

except errors.RetryWithNewPacks, e:

1193

self._access.reload_or_raise(e)

1194

1195

def _get_remaining_record_stream(self, keys, ordering,

1196

include_delta_closure):

1197

"""This function is the 'retry' portion for get_record_stream."""

1198

1069

if include_delta_closure:

1199

1070

positions = self._get_components_positions(keys, allow_missing=True)

1200

1071

else:

1207

1078

# There may be more absent keys : if we're missing the basis component

1208

1079

# and are trying to include the delta closure.

1209

1080

if include_delta_closure:

1210

needed_from_fallback = set()

1211

1081

# Build up reconstructable_keys dict. key:True in this dict means

1212

1082

# the key can be reconstructed.

1213

1083

reconstructable_keys = {}

1216

1086

try:

1217

1087

chain = [key, positions[key][2]]

1218

1088

except KeyError:

1219

needed_from_fallback.add(key)

1089

absent_keys.add(key)

1220

1090

continue

1221

1091

result = True

1222

1092

while chain[-1] is not None:

1228

1098

chain.append(positions[chain[-1]][2])

1229

1099

except KeyError:

1230

1100

# missing basis component

1231

needed_from_fallback.add(chain[-1])

1232

result = True

1101

result = False

1233

1102

break

1234

1103

for chain_key in chain[:-1]:

1235

1104

reconstructable_keys[chain_key] = result

1236

1105

if not result:

1237

needed_from_fallback.add(key)

1238

# Double index lookups here : need a unified api ?

1239

global_map, parent_maps = self._get_parent_map_with_sources(keys)

1240

if ordering == 'topological':

1241

# Global topological sort

1242

present_keys = tsort.topo_sort(global_map)

1243

# Now group by source:

1244

source_keys = []

1245

current_source = None

1246

for key in present_keys:

1247

for parent_map in parent_maps:

1248

if key in parent_map:

1249

key_source = parent_map

1250

break

1251

if current_source is not key_source:

1252

source_keys.append((key_source, []))

1253

current_source = key_source

1254

source_keys[-1][1].append(key)

1255

else:

1256

if ordering != 'unordered':

1257

raise AssertionError('valid values for ordering are:'

1258

' "unordered" or "topological" not: %r'

1259

% (ordering,))

1260

# Just group by source; remote sources first.

1261

present_keys = []

1262

source_keys = []

1263

for parent_map in reversed(parent_maps):

1264

source_keys.append((parent_map, []))

1265

for key in parent_map:

1266

present_keys.append(key)

1267

source_keys[-1][1].append(key)

1268

# We have been requested to return these records in an order that

1269

# suits us. So we ask the index to give us an optimally sorted

1270

# order.

1271

for source, sub_keys in source_keys:

1272

if source is parent_maps[0]:

1273

# Only sort the keys for this VF

1274

self._index._sort_keys_by_io(sub_keys, positions)

1275

absent_keys = keys - set(global_map)

1106

absent_keys.add(key)

1276

1107

for key in absent_keys:

1277

1108

yield AbsentContentFactory(key)

1278

1109

# restrict our view to the keys we can answer.

1110

keys = keys - absent_keys

1111

# Double index lookups here : need a unified api ?

1112

parent_map = self.get_parent_map(keys)

1113

if ordering == 'topological':

1114

present_keys = topo_sort(parent_map)

1115

else:

1116

present_keys = keys

1279

1117

# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.

1280

# XXX: At that point we need to consider the impact of double reads by

1281

# utilising components multiple times.

1118

# XXX: At that point we need to consider double reads by utilising

1119

# components multiple times.

1282

1120

if include_delta_closure:

1283

1121

# XXX: get_content_maps performs its own index queries; allow state

1284

1122

# to be passed in.

1285

non_local_keys = needed_from_fallback - absent_keys

1286

prefix_split_keys = self._split_by_prefix(present_keys)

1287

prefix_split_non_local_keys = self._split_by_prefix(non_local_keys)

1288

for prefix, keys in prefix_split_keys.iteritems():

1289

non_local = prefix_split_non_local_keys.get(prefix, [])

1290

non_local = set(non_local)

1291

text_map, _ = self._get_content_maps(keys, non_local)

1292

for key in keys:

1293

lines = text_map.pop(key)

1294

yield ChunkedContentFactory(key, global_map[key], None,

1295

lines)

1123

text_map, _ = self._get_content_maps(present_keys)

1124

for key in present_keys:

1125

yield FulltextContentFactory(key, parent_map[key], None,

1126

''.join(text_map[key]))

1296

1127

else:

1297

for source, keys in source_keys:

1298

if source is parent_maps[0]:

1299

# this KnitVersionedFiles

1300

records = [(key, positions[key][1]) for key in keys]

1301

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1302

(record_details, index_memo, _) = positions[key]

1303

yield KnitContentFactory(key, global_map[key],

1304

record_details, sha1, raw_data, self._factory.annotated, None)

1305

else:

1306

vf = self._fallback_vfs[parent_maps.index(source) - 1]

1307

for record in vf.get_record_stream(keys, ordering,

1308

include_delta_closure):

1309

yield record

1128

records = [(key, positions[key][1]) for key in present_keys]

1129

for key, raw_data, sha1 in self._read_records_iter_raw(records):

1130

(record_details, index_memo, _) = positions[key]

1131

yield KnitContentFactory(key, parent_map[key],

1132

record_details, sha1, raw_data, self._factory.annotated, None)

1310

1133

1311

1134

def get_sha1s(self, keys):

1312

1135

"""See VersionedFiles.get_sha1s()."""

1313

missing = set(keys)

1314

record_map = self._get_record_map(missing, allow_missing=True)

1315

result = {}

1316

for key, details in record_map.iteritems():

1317

if key not in missing:

1318

continue

1319

# record entry 2 is the 'digest'.

1320

result[key] = details[2]

1321

missing.difference_update(set(result))

1322

for source in self._fallback_vfs:

1323

if not missing:

1324

break

1325

new_result = source.get_sha1s(missing)

1326

result.update(new_result)

1327

missing.difference_update(set(new_result))

1328

return result

1136

record_map = self._get_record_map(keys)

1137

# record entry 2 is the 'digest'.

1138

return [record_map[key][2] for key in keys]

1329

1139

1330

1140

def insert_record_stream(self, stream):

1331

1141

"""Insert a record stream into this container.

1342

1152

adapter = adapter_factory(self)

1343

1153

adapters[adapter_key] = adapter

1344

1154

return adapter

1345

delta_types = set()

1346

1155

if self._factory.annotated:

1347

1156

# self is annotated, we need annotated knits to use directly.

1348

1157

annotated = "annotated-"

1352

1161

annotated = ""

1353

1162

convertibles = set(["knit-annotated-ft-gz"])

1354

1163

if self._max_delta_chain:

1355

delta_types.add("knit-annotated-delta-gz")

1356

1164

convertibles.add("knit-annotated-delta-gz")

1357

1165

# The set of types we can cheaply adapt without needing basis texts.

1358

1166

native_types = set()

1359

1167

if self._max_delta_chain:

1360

1168

native_types.add("knit-%sdelta-gz" % annotated)

1361

delta_types.add("knit-%sdelta-gz" % annotated)

1362

1169

native_types.add("knit-%sft-gz" % annotated)

1363

1170

knit_types = native_types.union(convertibles)

1364

1171

adapters = {}

1368

1175

# can't generate annotations from new deltas until their basis parent

1369

1176

# is present anyway, so we get away with not needing an index that

1370

1177

# includes the new keys.

1371

1372

# See <http://launchpad.net/bugs/300177> about ordering of compression

1373

# parents in the records - to be conservative, we insist that all

1374

# parents must be present to avoid expanding to a fulltext.

1375

1376

1178

# key = basis_parent, value = index entry to add

1377

1179

buffered_index_entries = {}

1378

1180

for record in stream:

1379

1181

parents = record.parents

1380

if record.storage_kind in delta_types:

1381

# TODO: eventually the record itself should track

1382

# compression_parent

1383

compression_parent = parents[0]

1384

else:

1385

compression_parent = None

1386

1182

# Raise an error when a record is missing.

1387

1183

if record.storage_kind == 'absent':

1388

1184

raise RevisionNotPresent([record.key], self)

1389

elif ((record.storage_kind in knit_types)

1390

and (compression_parent is None

1391

or not self._fallback_vfs

1392

or self._index.has_key(compression_parent)

1393

or not self.has_key(compression_parent))):

1394

# we can insert the knit record literally if either it has no

1395

# compression parent OR we already have its basis in this kvf

1396

# OR the basis is not present even in the fallbacks. In the

1397

# last case it will either turn up later in the stream and all

1398

# will be well, or it won't turn up at all and we'll raise an

1399

# error at the end.

1400

1401

# TODO: self.has_key is somewhat redundant with

1402

# self._index.has_key; we really want something that directly

1403

# asks if it's only present in the fallbacks. -- mbp 20081119

1185

if record.storage_kind in knit_types:

1404

1186

if record.storage_kind not in native_types:

1405

1187

try:

1406

1188

adapter_key = (record.storage_kind, "knit-delta-gz")

1428

1210

index_entry = (record.key, options, access_memo, parents)

1429

1211

buffered = False

1430

1212

if 'fulltext' not in options:

1431

# Not a fulltext, so we need to make sure the compression

1432

# parent will also be present.

1213

basis_parent = parents[0]

1433

1214

# Note that pack backed knits don't need to buffer here

1434

1215

# because they buffer all writes to the transaction level,

1435

1216

# but we don't expose that difference at the index level. If

1436

1217

# the query here has sufficient cost to show up in

1437

1218

# profiling we should do that.

1438

1439

# They're required to be physically in this

1440

# KnitVersionedFiles, not in a fallback.

1441

if not self._index.has_key(compression_parent):

1219

if basis_parent not in self.get_parent_map([basis_parent]):

1442

1220

pending = buffered_index_entries.setdefault(

1443

compression_parent, [])

1221

basis_parent, [])

1444

1222

pending.append(index_entry)

1445

1223

buffered = True

1446

1224

if not buffered:

1447

1225

self._index.add_records([index_entry])

1448

elif record.storage_kind == 'chunked':

1449

self.add_lines(record.key, parents,

1450

osutils.chunks_to_lines(record.get_bytes_as('chunked')))

1451

1226

elif record.storage_kind == 'fulltext':

1452

1227

self.add_lines(record.key, parents,

1453

1228

split_lines(record.get_bytes_as('fulltext')))

1454

1229

else:

1455

# Not a fulltext, and not suitable for direct insertion as a

1456

# delta, either because it's not the right format, or this

1457

# KnitVersionedFiles doesn't permit deltas (_max_delta_chain ==

1458

# 0) or because it depends on a base only present in the

1459

# fallback kvfs.

1460

1230

adapter_key = record.storage_kind, 'fulltext'

1461

1231

adapter = get_adapter(adapter_key)

1462

1232

lines = split_lines(adapter.get_bytes(

1477

1247

del buffered_index_entries[key]

1478

1248

# If there were any deltas which had a missing basis parent, error.

1479

1249

if buffered_index_entries:

1480

from pprint import pformat

1481

raise errors.BzrCheckError(

1482

"record_stream refers to compression parents not in %r:\n%s"

1483

% (self, pformat(sorted(buffered_index_entries.keys()))))

1250

raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],

1251

self)

1484

1252

1485

1253

def iter_lines_added_or_present_in_keys(self, keys, pb=None):

1486

1254

"""Iterate over the lines in the versioned files from keys.

1497

1265

is an iterator).

1498

1266

1499

1267

NOTES:

1500

* Lines are normalised by the underlying store: they will all have \\n

1268

* Lines are normalised by the underlying store: they will all have \n

1501

1269

terminators.

1502

1270

* Lines are returned in arbitrary order.

1503

* If a requested key did not change any lines (or didn't have any

1504

lines), it may not be mentioned at all in the result.

1505

1271

1506

1272

:return: An iterator over (line, key).

1507

1273

"""

1508

1274

if pb is None:

1509

1275

pb = progress.DummyProgress()

1510

1276

keys = set(keys)

1511

total = len(keys)

1512

done = False

1513

while not done:

1514

try:

1515

# we don't care about inclusions, the caller cares.

1516

# but we need to setup a list of records to visit.

1517

# we need key, position, length

1518

key_records = []

1519

build_details = self._index.get_build_details(keys)

1520

for key, details in build_details.iteritems():

1521

if key in keys:

1522

key_records.append((key, details[0]))

1523

records_iter = enumerate(self._read_records_iter(key_records))

1524

for (key_idx, (key, data, sha_value)) in records_iter:

1525

pb.update('Walking content.', key_idx, total)

1526

compression_parent = build_details[key][1]

1527

if compression_parent is None:

1528

# fulltext

1529

line_iterator = self._factory.get_fulltext_content(data)

1530

else:

1531

# Delta

1532

line_iterator = self._factory.get_linedelta_content(data)

1533

# Now that we are yielding the data for this key, remove it

1534

# from the list

1535

keys.remove(key)

1536

# XXX: It might be more efficient to yield (key,

1537

# line_iterator) in the future. However for now, this is a

1538

# simpler change to integrate into the rest of the

1539

# codebase. RBC 20071110

1540

for line in line_iterator:

1541

yield line, key

1542

done = True

1543

except errors.RetryWithNewPacks, e:

1544

self._access.reload_or_raise(e)

1545

# If there are still keys we've not yet found, we look in the fallback

1546

# vfs, and hope to find them there. Note that if the keys are found

1547

# but had no changes or no content, the fallback may not return

1548

# anything.

1549

if keys and not self._fallback_vfs:

1550

# XXX: strictly the second parameter is meant to be the file id

1551

# but it's not easily accessible here.

1552

raise RevisionNotPresent(keys, repr(self))

1553

for source in self._fallback_vfs:

1554

if not keys:

1555

break

1556

source_keys = set()

1557

for line, key in source.iter_lines_added_or_present_in_keys(keys):

1558

source_keys.add(key)

1277

# filter for available keys

1278

parent_map = self.get_parent_map(keys)

1279

if len(parent_map) != len(keys):

1280

missing = set(parent_map) - requested_keys

1281

raise RevisionNotPresent(key, self.filename)

1282

# we don't care about inclusions, the caller cares.

1283

# but we need to setup a list of records to visit.

1284

# we need key, position, length

1285

key_records = []

1286

build_details = self._index.get_build_details(keys)

1287

for key in keys:

1288

# [0] is index_memo

1289

key_records.append((key, build_details[key][0]))

1290

total = len(key_records)

1291

records_iter = enumerate(self._read_records_iter(key_records))

1292

for (key_idx, (key, data, sha_value)) in records_iter:

1293

pb.update('Walking content.', key_idx, total)

1294

compression_parent = build_details[key][1]

1295

if compression_parent is None:

1296

# fulltext

1297

line_iterator = self._factory.get_fulltext_content(data)

1298

else:

1299

# Delta

1300

line_iterator = self._factory.get_linedelta_content(data)

1301

# XXX: It might be more efficient to yield (key,

1302

# line_iterator) in the future. However for now, this is a simpler

1303

# change to integrate into the rest of the codebase. RBC 20071110

1304

for line in line_iterator:

1559

1305

yield line, key

1560

keys.difference_update(source_keys)

1561

1306

pb.update('Walking content.', total, total)

1562

1307

1563

1308

def _make_line_delta(self, delta_seq, new_content):

1629

1374

:return: the header and the decompressor stream.

1630

1375

as (stream, header_record)

1631

1376

"""

1632

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(raw_data))

1377

df = GzipFile(mode='rb', fileobj=StringIO(raw_data))

1633

1378

try:

1634

1379

# Current serialise

1635

1380

rec = self._check_header(key, df.readline())

1644

1389

# 4168 calls in 2880 217 internal

1645

1390

# 4168 calls to _parse_record_header in 2121

1646

1391

# 4168 calls to readlines in 330

1647

df = tuned_gzip.GzipFile(mode='rb', fileobj=StringIO(data))

1392

df = GzipFile(mode='rb', fileobj=StringIO(data))

1648

1393

try:

1649

1394

record_contents = df.readlines()

1650

1395

except Exception, e:

1745

1490

'data must be plain bytes was %s' % type(bytes))

1746

1491

if lines and lines[-1][-1] != '\n':

1747

1492

raise ValueError('corrupt lines value %r' % lines)

1748

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1493

compressed_bytes = bytes_to_gzip(bytes)

1749

1494

return len(compressed_bytes), compressed_bytes

1750

1495

1751

1496

def _split_header(self, line):

1759

1504

"""See VersionedFiles.keys."""

1760

1505

if 'evil' in debug.debug_flags:

1761

1506

trace.mutter_callsite(2, "keys scales with size of history")

1762

sources = [self._index] + self._fallback_vfs

1763

result = set()

1764

for source in sources:

1765

result.update(source.keys())

1766

return result

1507

return self._index.keys()

1767

1508

1768

1509

1769

1510

class _KndxIndex(object):

1954

1695

extra information about the content which needs to be passed to

1955

1696

Factory.parse_record

1956

1697

"""

1698

prefixes = self._partition_keys(keys)

1957

1699

parent_map = self.get_parent_map(keys)

1958

1700

result = {}

1959

1701

for key in keys:

1988

1730

"""

1989

1731

prefix, suffix = self._split_key(key)

1990

1732

self._load_prefixes([prefix])

1991

try:

1992

return self._kndx_cache[prefix][0][suffix][1]

1993

except KeyError:

1994

raise RevisionNotPresent(key, self)

1733

return self._kndx_cache[prefix][0][suffix][1]

1995

1734

1996

1735

def get_parent_map(self, keys):

1997

1736

"""Get a map of the parents of keys.

2028

1767

entry = self._kndx_cache[prefix][0][suffix]

2029

1768

return key, entry[2], entry[3]

2030

1769

2031

has_key = _mod_index._has_key_from_parent_map

2032

2033

1770

def _init_index(self, path, extra_lines=[]):

2034

1771

"""Initialize an index."""

2035

1772

sio = StringIO()

2095

1832

del self._filename

2096

1833

del self._history

2097

1834

2098

missing_keys = _mod_index._missing_keys_from_parent_map

2099

2100

1835

def _partition_keys(self, keys):

2101

1836

"""Turn keys into a dict of prefix:suffix_list."""

2102

1837

result = {}

2141

1876

else:

2142

1877

self._mode = 'r'

2143

1878

2144

def _sort_keys_by_io(self, keys, positions):

2145

"""Figure out an optimal order to read the records for the given keys.

2146

2147

Sort keys, grouped by index and sorted by position.

2148

2149

:param keys: A list of keys whose records we want to read. This will be

2150

sorted 'in-place'.

2151

:param positions: A dict, such as the one returned by

2152

_get_components_positions()

2153

:return: None

2154

"""

2155

def get_sort_key(key):

2156

index_memo = positions[key][1]

2157

# Group by prefix and position. index_memo[0] is the key, so it is

2158

# (file_id, revision_id) and we don't want to sort on revision_id,

2159

# index_memo[1] is the position, and index_memo[2] is the size,

2160

# which doesn't matter for the sort

2161

return index_memo[0][:-1], index_memo[1]

2162

return keys.sort(key=get_sort_key)

2163

2164

1879

def _split_key(self, key):

2165

1880

"""Split key into a prefix and suffix."""

2166

1881

return key[:-1], key[-1]

2197

1912

self.has_graph = parents

2198

1913

self._is_locked = is_locked

2199

1914

2200

def __repr__(self):

2201

return "%s(%r)" % (self.__class__.__name__, self._graph_index)

2202

2203

1915

def add_records(self, records, random_id=False):

2204

1916

"""Add multiple records to the index.

2205

1917

2250

1962

present_nodes = self._get_entries(keys)

2251

1963

for (index, key, value, node_refs) in present_nodes:

2252

1964

if (value[0] != keys[key][0][0] or

2253

node_refs[:1] != keys[key][1][:1]):

1965

node_refs != keys[key][1]):

2254

1966

raise KnitCorrupt(self, "inconsistent details in add_records"

2255

1967

": %s %s" % ((value, node_refs), keys[key]))

2256

1968

del keys[key]

2403

2115

node = self._get_node(key)

2404

2116

return self._node_to_position(node)

2405

2117

2406

has_key = _mod_index._has_key_from_parent_map

2407

2408

2118

def keys(self):

2409

2119

"""Get all the keys in the collection.

2410

2120

2413

2123

self._check_read()

2414

2124

return [node[1] for node in self._graph_index.iter_all_entries()]

2415

2125

2416

missing_keys = _mod_index._missing_keys_from_parent_map

2417

2418

2126

def _node_to_position(self, node):

2419

2127

"""Convert an index value to position details."""

2420

2128

bits = node[2][1:].split(' ')

2421

2129

return node[0], int(bits[0]), int(bits[1])

2422

2130

2423

def _sort_keys_by_io(self, keys, positions):

2424

"""Figure out an optimal order to read the records for the given keys.

2425

2426

Sort keys, grouped by index and sorted by position.

2427

2428

:param keys: A list of keys whose records we want to read. This will be

2429

sorted 'in-place'.

2430

:param positions: A dict, such as the one returned by

2431

_get_components_positions()

2432

:return: None

2433

"""

2434

def get_index_memo(key):

2435

# index_memo is at offset [1]. It is made up of (GraphIndex,

2436

# position, size). GI is an object, which will be unique for each

2437

# pack file. This causes us to group by pack file, then sort by

2438

# position. Size doesn't matter, but it isn't worth breaking up the

2439

# tuple.

2440

return positions[key][1]

2441

return keys.sort(key=get_index_memo)

2442

2443

2131

2444

2132

class _KnitKeyAccess(object):

2445

2133

"""Access to records in .knit files."""

2519

2207

class _DirectPackAccess(object):

2520

2208

"""Access to data in one or more packs with less translation."""

2521

2209

2522

def __init__(self, index_to_packs, reload_func=None):

2210

def __init__(self, index_to_packs):

2523

2211

"""Create a _DirectPackAccess object.

2524

2212

2525

2213

:param index_to_packs: A dict mapping index objects to the transport

2526

2214

and file names for obtaining data.

2527

:param reload_func: A function to call if we determine that the pack

2528

files have moved and we need to reload our caches. See

2529

bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.

2530

2215

"""

2531

2216

self._container_writer = None

2532

2217

self._write_index = None

2533

2218

self._indices = index_to_packs

2534

self._reload_func = reload_func

2535

2219

2536

2220

def add_raw_records(self, key_sizes, raw_data):

2537

2221

"""Add raw knit bytes to a storage area.

2583

2267

if current_index is not None:

2584

2268

request_lists.append((current_index, current_list))

2585

2269

for index, offsets in request_lists:

2586

try:

2587

transport, path = self._indices[index]

2588

except KeyError:

2589

# A KeyError here indicates that someone has triggered an index

2590

# reload, and this index has gone missing, we need to start

2591

# over.

2592

if self._reload_func is None:

2593

# If we don't have a _reload_func there is nothing that can

2594

# be done

2595

raise

2596

raise errors.RetryWithNewPacks(index,

2597

reload_occurred=True,

2598

exc_info=sys.exc_info())

2599

try:

2600

reader = pack.make_readv_reader(transport, path, offsets)

2601

for names, read_func in reader.iter_records():

2602

yield read_func(None)

2603

except errors.NoSuchFile:

2604

# A NoSuchFile error indicates that a pack file has gone

2605

# missing on disk, we need to trigger a reload, and start over.

2606

if self._reload_func is None:

2607

raise

2608

raise errors.RetryWithNewPacks(transport.abspath(path),

2609

reload_occurred=False,

2610

exc_info=sys.exc_info())

2270

transport, path = self._indices[index]

2271

reader = pack.make_readv_reader(transport, path, offsets)

2272

for names, read_func in reader.iter_records():

2273

yield read_func(None)

2611

2274

2612

2275

def set_writer(self, writer, index, transport_packname):

2613

2276

"""Set a writer to use for adding data."""

2616

2279

self._container_writer = writer

2617

2280

self._write_index = index

2618

2281

2619

def reload_or_raise(self, retry_exc):

2620

"""Try calling the reload function, or re-raise the original exception.

2621

2622

This should be called after _DirectPackAccess raises a

2623

RetryWithNewPacks exception. This function will handle the common logic

2624

of determining when the error is fatal versus being temporary.

2625

It will also make sure that the original exception is raised, rather

2626

than the RetryWithNewPacks exception.

2627

2628

If this function returns, then the calling function should retry

2629

whatever operation was being performed. Otherwise an exception will

2630

be raised.

2631

2632

:param retry_exc: A RetryWithNewPacks exception.

2633

"""

2634

is_error = False

2635

if self._reload_func is None:

2636

is_error = True

2637

elif not self._reload_func():

2638

# The reload claimed that nothing changed

2639

if not retry_exc.reload_occurred:

2640

# If there wasn't an earlier reload, then we really were

2641

# expecting to find changes. We didn't find them, so this is a

2642

# hard error

2643

is_error = True

2644

if is_error:

2645

exc_class, exc_value, exc_traceback = retry_exc.exc_info

2646

raise exc_class, exc_value, exc_traceback

2647

2648

2282

2649

2283

# Deprecated, use PatienceSequenceMatcher instead

2650

2284

KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher

2863

2497

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

2864

2498

(index_memo, compression_parent, parents,

2865

2499

record_details) = self._all_build_details[rev_id]

2866

blocks = None

2867

2500

if compression_parent is not None:

2868

2501

comp_children = self._compression_children[compression_parent]

2869

2502

if rev_id not in comp_children:

2890

2523

copy_base_content=(not reuse_content))

2891

2524

fulltext = self._add_fulltext_content(rev_id,

2892

2525

fulltext_content)

2893

if compression_parent == parent_ids[0]:

2894

# the compression_parent is the left parent, so we can

2895

# re-use the delta

2896

blocks = KnitContent.get_line_delta_blocks(delta,

2897

parent_fulltext, fulltext)

2526

blocks = KnitContent.get_line_delta_blocks(delta,

2527

parent_fulltext, fulltext)

2898

2528

else:

2899

2529

fulltext_content = self._knit._factory.parse_fulltext(

2900

2530

record, rev_id)

2901

2531

fulltext = self._add_fulltext_content(rev_id,

2902

2532

fulltext_content)

2533

blocks = None

2903

2534

nodes_to_annotate.extend(

2904

2535

self._add_annotation(rev_id, fulltext, parent_ids,

2905

2536

left_matching_blocks=blocks))

2920

2551

2921

2552

:param key: The key to annotate.

2922

2553

"""

2923

if len(self._knit._fallback_vfs) > 0:

2924

# stacked knits can't use the fast path at present.

2925

return self._simple_annotate(key)

2926

while True:

2927

try:

2928

records = self._get_build_graph(key)

2929

if key in self._ghosts:

2930

raise errors.RevisionNotPresent(key, self._knit)

2931

self._annotate_records(records)

2932

return self._annotated_lines[key]

2933

except errors.RetryWithNewPacks, e:

2934

self._knit._access.reload_or_raise(e)

2935

# The cached build_details are no longer valid

2936

self._all_build_details.clear()

2937

2938

def _simple_annotate(self, key):

2939

"""Return annotated fulltext, rediffing from the full texts.

2940

2941

This is slow but makes no assumptions about the repository

2942

being able to produce line deltas.

2943

"""

2944

# TODO: this code generates a parent maps of present ancestors; it

2945

# could be split out into a separate method, and probably should use

2946

# iter_ancestry instead. -- mbp and robertc 20080704

2947

graph = _mod_graph.Graph(self._knit)

2948

head_cache = _mod_graph.FrozenHeadsCache(graph)

2949

search = graph._make_breadth_first_searcher([key])

2950

keys = set()

2951

while True:

2952

try:

2953

present, ghosts = search.next_with_ghosts()

2954

except StopIteration:

2955

break

2956

keys.update(present)

2957

parent_map = self._knit.get_parent_map(keys)

2958

parent_cache = {}

2959

reannotate = annotate.reannotate

2960

for record in self._knit.get_record_stream(keys, 'topological', True):

2961

key = record.key

2962

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

2963

parents = parent_map[key]

2964

if parents is not None:

2965

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

2966

else:

2967

parent_lines = []

2968

parent_cache[key] = list(

2969

reannotate(parent_lines, fulltext, key, None, head_cache))

2970

try:

2971

return parent_cache[key]

2972

except KeyError, e:

2554

records = self._get_build_graph(key)

2555

if key in self._ghosts:

2973

2556

raise errors.RevisionNotPresent(key, self._knit)

2557

self._annotate_records(records)

2558

return self._annotated_lines[key]

2974

2559

2975

2560

2976

2561

try:

Older »