~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2008-07-11 21:41:24 UTC
mto: This revision was merged to the branch mainline in revision 3543.
Revision ID: john@arbash-meinel.com-20080711214124-qi09irlj7pd5cuzg

Shortcut the case when one revision is in the ancestry of the other.

At the cost of a heads() check, when one parent supersedes, we don't have to extract
the text for the other. Changes merge time from 3m37s => 3m21s. Using a
CachingParentsProvider would drop the time down to 3m11s.

files added:
tools/win32/survey.txt

files removed:
bzrlib/_btree_serializer_c.pyx

bzrlib/_btree_serializer_py.py

bzrlib/_chunks_to_lines_py.py

bzrlib/_chunks_to_lines_pyx.pyx

bzrlib/_readdir_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/_walkdirs_win32.pyx

bzrlib/btree_index.py

bzrlib/chunk_writer.py

bzrlib/fifo_cache.py

bzrlib/foreign.py

bzrlib/help_topics/en/log-formats.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/help_topics/en/rules.txt

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/netrc_credential_store

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/plugins/netrc_credential_store/tests

bzrlib/plugins/netrc_credential_store/tests/__init__.py

bzrlib/plugins/netrc_credential_store/tests/test_netrc.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/readdir.h

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/packrepository.py

bzrlib/tests/blackbox/test_dump_btree.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/branch_implementations/test_dotted_revno_to_revision_id.py

bzrlib/tests/branch_implementations/test_iter_merge_sorted_revisions.py

bzrlib/tests/branch_implementations/test_revision_id_to_dotted_revno.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/fake_command.py

bzrlib/tests/https_server.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/ssl_certs

bzrlib/tests/ssl_certs/__init__.py

bzrlib/tests/ssl_certs/ca.crt

bzrlib/tests/ssl_certs/ca.key

bzrlib/tests/ssl_certs/create_ssls.py

bzrlib/tests/ssl_certs/server.crt

bzrlib/tests/ssl_certs/server.csr

bzrlib/tests/ssl_certs/server_with_pass.key

bzrlib/tests/ssl_certs/server_without_pass.key

bzrlib/tests/test__chunks_to_lines.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chunk_writer.py

bzrlib/tests/test_fifo_cache.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_rules.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/tree_implementations/test_iter_search_rules.py

bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py

bzrlib/transport/ftp

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/log.py

contrib/bash/bzrbashprompt.sh

contrib/bzr_ssh_path_limiter

contrib/convert_to_1.9.py

doc/developers/btree_index_prefetch.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/colocated-branches.txt

doc/developers/cycle.txt

doc/developers/lca_tree_merging.txt

doc/developers/overview.txt

doc/developers/ppa.txt

doc/developers/testing.txt

doc/en/user-guide/stacked.txt

doc/news-template.txt

tools/packaging

tools/packaging/build-packages.sh

tools/packaging/lp-upload-release

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/prepare_for_latex.py

tools/rst2pdf.py

tools/win32/build_release.py

tools/win32/run_script.py

files renamed:
bzrlib/tests/per_repository/ => bzrlib/tests/repository_implementations/

bzrlib/transport/ftp/__init__.py => bzrlib/transport/ftp.py

files modified:
.bzrignore

Makefile

NEWS

bzr.ico

bzrlib/__init__.py

bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_c.c

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/api.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/directory_service.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/branch.py

bzrlib/smart/client.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/commands/test_commit.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_fetch.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_add_inventory.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/helpers.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/repository_implementations/test_get_parent_map.py

bzrlib/tests/repository_implementations/test_has_revisions.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_is_write_locked.py

bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/repository_implementations/test_statistics.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_config.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_index.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_move.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/ca_bundle.py

bzrlib/transport/http/response.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/urlutils.py

bzrlib/util/bencode.py

bzrlib/util/configobj/configobj.py

bzrlib/util/tests/test_bencode.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml7.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/api-versioning.txt

doc/developers/authentication-ring.txt

doc/developers/development-repo.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/plugin-api.txt

doc/developers/releasing.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/adv_merging.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/index.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/organizing_branches.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/reusing_a_checkout.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/solo_intro.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_aliases.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/using_gatekeepers.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

profile_imports.py

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/win32/bzr.iss.cog

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

from bisect import bisect_right

from cStringIO import StringIO

import re

import sys

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

_newline_null_re = re.compile('[\n\0]')

def _has_key_from_parent_map(self, key):

"""Check if this index has one key.

If it's possible to check for multiple keys at once through

calling get_parent_map that should be faster.

"""

return (key in self.get_parent_map([key]))

def _missing_keys_from_parent_map(self, keys):

return set(keys) - set(self.get_parent_map(keys))

class GraphIndexBuilder(object):

"""A builder that can build a GraphIndex.

"""

self.reference_lists = reference_lists

self._keys = set()

# A dict of {key: (absent, ref_lists, value)}

self._nodes = {}

self._nodes_by_key = None

self._nodes_by_key = {}

100

self._key_length = key_elements

101

self._optimize_for_size = False

102

103

def _check_key(self, key):

104

"""Raise BadIndexKey if key is not a valid key for this index."""

110

if not element or _whitespace_re.search(element) is not None:

111

raise errors.BadIndexKey(element)

112

113

def _external_references(self):

114

"""Return references that are not present in this index.

115

"""

116

keys = set()

117

refs = set()

118

# TODO: JAM 2008-11-21 This makes an assumption about how the reference

119

# lists are used. It is currently correct for pack-0.92 through

120

# 1.9, which use the node references (3rd column) second

121

# reference list as the compression parent. Perhaps this should

122

# be moved into something higher up the stack, since it

123

# makes assumptions about how the index is used.

124

if self.reference_lists > 1:

125

for node in self.iter_all_entries():

126

keys.add(node[1])

127

refs.update(node[3][1])

128

return refs - keys

129

else:

130

# If reference_lists == 0 there can be no external references, and

131

# if reference_lists == 1, then there isn't a place to store the

132

# compression parent

133

return set()

134

135

def _get_nodes_by_key(self):

136

if self._nodes_by_key is None:

137

nodes_by_key = {}

138

if self.reference_lists:

139

for key, (absent, references, value) in self._nodes.iteritems():

140

if absent:

141

continue

142

key_dict = nodes_by_key

143

for subkey in key[:-1]:

144

key_dict = key_dict.setdefault(subkey, {})

145

key_dict[key[-1]] = key, value, references

146

else:

147

for key, (absent, references, value) in self._nodes.iteritems():

148

if absent:

149

continue

150

key_dict = nodes_by_key

151

for subkey in key[:-1]:

152

key_dict = key_dict.setdefault(subkey, {})

153

key_dict[key[-1]] = key, value

154

self._nodes_by_key = nodes_by_key

155

return self._nodes_by_key

156

157

def _update_nodes_by_key(self, key, value, node_refs):

158

"""Update the _nodes_by_key dict with a new key.

159

160

For a key of (foo, bar, baz) create

161

_nodes_by_key[foo][bar][baz] = key_value

162

"""

163

if self._nodes_by_key is None:

164

return

165

key_dict = self._nodes_by_key

166

if self.reference_lists:

167

key_value = key, value, node_refs

168

else:

169

key_value = key, value

170

for subkey in key[:-1]:

171

key_dict = key_dict.setdefault(subkey, {})

172

key_dict[key[-1]] = key_value

173

174

def _check_key_ref_value(self, key, references, value):

175

"""Check that 'key' and 'references' are all valid.

176

177

:param key: A key tuple. Must conform to the key interface (be a tuple,

178

be of the right length, not have any whitespace or nulls in any key

179

element.)

180

:param references: An iterable of reference lists. Something like

181

[[(ref, key)], [(ref, key), (other, key)]]

182

:param value: The value associate with this key. Must not contain

183

newlines or null characters.

184

:return: (node_refs, absent_references)

185

node_refs basically a packed form of 'references' where all

186

iterables are tuples

187

absent_references reference keys that are not in self._nodes.

188

This may contain duplicates if the same key is

189

referenced in multiple lists.

def add_node(self, key, value, references=()):

"""Add a node to the index.

100

:param key: The key. keys are non-empty tuples containing

101

as many whitespace-free utf8 bytestrings as the key length

102

defined for this index.

103

:param references: An iterable of iterables of keys. Each is a

104

reference to another key.

105

:param value: The value to associate with the key. It may be any

106

bytes as long as it does not contain \0 or \n.

190

107

"""

191

108

self._check_key(key)

192

109

if _newline_null_re.search(value) is not None:

194

111

if len(references) != self.reference_lists:

195

112

raise errors.BadIndexValue(references)

196

113

node_refs = []

197

absent_references = []

198

114

for reference_list in references:

199

115

for reference in reference_list:

200

# If reference *is* in self._nodes, then we know it has already

201

# been checked.

116

self._check_key(reference)

202

117

if reference not in self._nodes:

203

self._check_key(reference)

204

absent_references.append(reference)

118

self._nodes[reference] = ('a', (), '')

205

119

node_refs.append(tuple(reference_list))

206

return tuple(node_refs), absent_references

207

208

def add_node(self, key, value, references=()):

209

"""Add a node to the index.

210

211

:param key: The key. keys are non-empty tuples containing

212

as many whitespace-free utf8 bytestrings as the key length

213

defined for this index.

214

:param references: An iterable of iterables of keys. Each is a

215

reference to another key.

216

:param value: The value to associate with the key. It may be any

217

bytes as long as it does not contain \0 or \n.

218

"""

219

(node_refs,

220

absent_references) = self._check_key_ref_value(key, references, value)

221

if key in self._nodes and self._nodes[key][0] != 'a':

120

if key in self._nodes and self._nodes[key][0] == '':

222

121

raise errors.BadIndexDuplicateKey(key, self)

223

for reference in absent_references:

224

# There may be duplicates, but I don't think it is worth worrying

225

# about

226

self._nodes[reference] = ('a', (), '')

227

self._nodes[key] = ('', node_refs, value)

122

self._nodes[key] = ('', tuple(node_refs), value)

228

123

self._keys.add(key)

229

if self._nodes_by_key is not None and self._key_length > 1:

230

self._update_nodes_by_key(key, value, node_refs)

124

if self._key_length > 1:

125

key_dict = self._nodes_by_key

126

if self.reference_lists:

127

key_value = key, value, tuple(node_refs)

128

else:

129

key_value = key, value

130

# possibly should do this on-demand, but it seems likely it is

131

# always wanted

132

# For a key of (foo, bar, baz) create

133

# _nodes_by_key[foo][bar][baz] = key_value

134

for subkey in key[:-1]:

135

key_dict = key_dict.setdefault(subkey, {})

136

key_dict[key[-1]] = key_value

231

137

232

138

def finish(self):

233

139

lines = [_SIGNATURE]

236

142

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

237

143

prefix_length = sum(len(x) for x in lines)

238

144

# references are byte offsets. To avoid having to do nasty

239

# polynomial work to resolve offsets (references to later in the

145

# polynomial work to resolve offsets (references to later in the

240

146

# file cannot be determined until all the inbetween references have

241

147

# been calculated too) we pad the offsets with 0's to make them be

242

148

# of consistent length. Using binary offsets would break the trivial

315

221

(len(result.getvalue()), expected_bytes))

316

222

return result

317

223

318

def set_optimize(self, for_size=True):

319

"""Change how the builder tries to optimize the result.

320

321

:param for_size: Tell the builder to try and make the index as small as

322

possible.

323

:return: None

324

"""

325

# GraphIndexBuilder itself doesn't pay attention to the flag yet, but

326

# other builders do.

327

self._optimize_for_size = for_size

328

329

224

330

225

class GraphIndex(object):

331

226

"""An index for data with embedded graphs.

377

272

self._keys_by_offset = None

378

273

self._nodes_by_key = None

379

274

self._size = size

380

# The number of bytes we've read so far in trying to process this file

381

self._bytes_read = 0

382

275

383

276

def __eq__(self, other):

384

277

"""Equal when self and other were created with the same parameters."""

391

284

def __ne__(self, other):

392

285

return not self.__eq__(other)

393

286

394

def __repr__(self):

395

return "%s(%r)" % (self.__class__.__name__,

396

self._transport.abspath(self._name))

397

398

def _buffer_all(self, stream=None):

287

def _buffer_all(self):

399

288

"""Buffer all the index data.

400

289

401

290

Mutates self._nodes and self.keys_by_offset.

402

291

"""

403

if self._nodes is not None:

404

# We already did this

405

return

406

292

if 'index' in debug.debug_flags:

407

293

mutter('Reading entire index %s', self._transport.abspath(self._name))

408

if stream is None:

409

stream = self._transport.get(self._name)

294

stream = self._transport.get(self._name)

410

295

self._read_prefix(stream)

411

296

self._expected_elements = 3 + self._key_length

412

297

line_count = 0

414

299

self._keys_by_offset = {}

415

300

# ready-to-return key:value or key:value, node_ref_lists

416

301

self._nodes = {}

417

self._nodes_by_key = None

302

self._nodes_by_key = {}

418

303

trailers = 0

419

304

pos = stream.tell()

420

305

lines = stream.read().split('\n')

429

314

else:

430

315

node_value = value

431

316

self._nodes[key] = node_value

317

if self._key_length > 1:

318

subkey = list(reversed(key[:-1]))

319

key_dict = self._nodes_by_key

320

if self.node_ref_lists:

321

key_value = key, node_value[0], node_value[1]

322

else:

323

key_value = key, node_value

324

# possibly should do this on-demand, but it seems likely it is

325

# always wanted

326

# For a key of (foo, bar, baz) create

327

# _nodes_by_key[foo][bar][baz] = key_value

328

for subkey in key[:-1]:

329

key_dict = key_dict.setdefault(subkey, {})

330

key_dict[key[-1]] = key_value

432

331

# cache the keys for quick set intersections

433

332

self._keys = set(self._nodes)

434

333

if trailers != 1:

435

334

# there must be one line - the empty trailer line.

436

335

raise errors.BadIndexData(self)

437

336

438

def _get_nodes_by_key(self):

439

if self._nodes_by_key is None:

440

nodes_by_key = {}

441

if self.node_ref_lists:

442

for key, (value, references) in self._nodes.iteritems():

443

key_dict = nodes_by_key

444

for subkey in key[:-1]:

445

key_dict = key_dict.setdefault(subkey, {})

446

key_dict[key[-1]] = key, value, references

447

else:

448

for key, value in self._nodes.iteritems():

449

key_dict = nodes_by_key

450

for subkey in key[:-1]:

451

key_dict = key_dict.setdefault(subkey, {})

452

key_dict[key[-1]] = key, value

453

self._nodes_by_key = nodes_by_key

454

return self._nodes_by_key

455

456

337

def iter_all_entries(self):

457

338

"""Iterate over all keys within the index.

458

339

583

464

keys supplied. No additional keys will be returned, and every

584

465

key supplied that is in the index will be returned.

585

466

"""

467

# PERFORMANCE TODO: parse and bisect all remaining data at some

468

# threshold of total-index processing/get calling layers that expect to

469

# read the entire index to use the iter_all_entries method instead.

586

470

keys = set(keys)

587

471

if not keys:

588

472

return []

589

473

if self._size is None and self._nodes is None:

590

474

self._buffer_all()

591

592

# We fit about 20 keys per minimum-read (4K), so if we are looking for

593

# more than 1/20th of the index its likely (assuming homogenous key

594

# spread) that we'll read the entire index. If we're going to do that,

595

# buffer the whole thing. A better analysis might take key spread into

596

# account - but B+Tree indices are better anyway.

597

# We could look at all data read, and use a threshold there, which will

598

# trigger on ancestry walks, but that is not yet fully mapped out.

599

if self._nodes is None and len(keys) * 20 > self.key_count():

600

self._buffer_all()

601

475

if self._nodes is not None:

602

476

return self._iter_entries_from_total_buffer(keys)

603

477

else:

645

519

else:

646

520

yield self, key, self._nodes[key]

647

521

return

648

nodes_by_key = self._get_nodes_by_key()

649

522

for key in keys:

650

523

# sanity check

651

524

if key[0] is None:

653

526

if len(key) != self._key_length:

654

527

raise errors.BadIndexKey(key)

655

528

# find what it refers to:

656

key_dict = nodes_by_key

529

key_dict = self._nodes_by_key

657

530

elements = list(key)

658

531

# find the subdict whose contents should be returned.

659

532

try:

746

619

if self._bisect_nodes is None:

747

620

readv_ranges.append(_HEADER_READV)

748

621

self._read_and_parse(readv_ranges)

749

result = []

750

if self._nodes is not None:

751

# _read_and_parse triggered a _buffer_all because we requested the

752

# whole data range

753

for location, key in location_keys:

754

if key not in self._nodes: # not present

755

result.append(((location, key), False))

756

elif self.node_ref_lists:

757

value, refs = self._nodes[key]

758

result.append(((location, key),

759

(self, key, value, refs)))

760

else:

761

result.append(((location, key),

762

(self, key, self._nodes[key])))

763

return result

764

622

# generate results:

765

623

# - figure out <, >, missing, present

766

624

# - result present references so we can return them.

625

result = []

767

626

# keys that we cannot answer until we resolve references

768

627

pending_references = []

769

628

pending_locations = set()

819

678

if length > 0:

820

679

readv_ranges.append((location, length))

821

680

self._read_and_parse(readv_ranges)

822

if self._nodes is not None:

823

# The _read_and_parse triggered a _buffer_all, grab the data and

824

# return it

825

for location, key in pending_references:

826

value, refs = self._nodes[key]

827

result.append(((location, key), (self, key, value, refs)))

828

return result

829

681

for location, key in pending_references:

830

682

# answer key references we had to look-up-late.

683

index = self._parsed_key_index(key)

831

684

value, refs = self._bisect_nodes[key]

832

685

result.append(((location, key), (self, key,

833

686

value, self._resolve_references(refs))))

1024

877

elements = line.split('\0')

1025

878

if len(elements) != self._expected_elements:

1026

879

raise errors.BadIndexData(self)

1027

# keys are tuples. Each element is a string that may occur many

1028

# times, so we intern them to save space. AB, RC, 200807

1029

key = tuple([intern(element) for element in elements[:self._key_length]])

880

# keys are tuples

881

key = tuple(elements[:self._key_length])

1030

882

if first_key is None:

1031

883

first_key = key

1032

884

absent, references, value = elements[-3:]

1103

955

1104

956

:param readv_ranges: A prepared readv range list.

1105

957

"""

1106

if not readv_ranges:

1107

return

1108

if self._nodes is None and self._bytes_read * 2 >= self._size:

1109

# We've already read more than 50% of the file and we are about to

1110

# request more data, just _buffer_all() and be done

1111

self._buffer_all()

1112

return

1113

1114

readv_data = self._transport.readv(self._name, readv_ranges, True,

1115

self._size)

1116

# parse

1117

for offset, data in readv_data:

1118

self._bytes_read += len(data)

1119

if offset == 0 and len(data) == self._size:

1120

# We read the whole range, most likely because the

1121

# Transport upcast our readv ranges into one long request

1122

# for enough total data to grab the whole index.

1123

self._buffer_all(StringIO(data))

1124

return

1125

if self._bisect_nodes is None:

1126

# this must be the start

1127

if not (offset == 0):

1128

raise AssertionError()

1129

offset, data = self._parse_header_from_bytes(data)

1130

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

1131

self._parse_region(offset, data)

958

if readv_ranges:

959

readv_data = self._transport.readv(self._name, readv_ranges, True,

960

self._size)

961

# parse

962

for offset, data in readv_data:

963

if self._bisect_nodes is None:

964

# this must be the start

965

if not (offset == 0):

966

raise AssertionError()

967

offset, data = self._parse_header_from_bytes(data)

968

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

969

self._parse_region(offset, data)

1132

970

1133

971

def _signature(self):

1134

972

"""The file signature for this index type."""

1154

992

in the index list.

1155

993

"""

1156

994

1157

def __init__(self, indices, reload_func=None):

995

def __init__(self, indices):

1158

996

"""Create a CombinedGraphIndex backed by indices.

1159

997

1160

998

:param indices: An ordered list of indices to query for data.

1161

:param reload_func: A function to call if we find we are missing an

1162

index. Should have the form reload_func() => True/False to indicate

1163

if reloading actually changed anything.

1164

999

"""

1165

1000

self._indices = indices

1166

self._reload_func = reload_func

1167

1001

1168

1002

def __repr__(self):

1169

1003

return "%s(%s)" % (

1202

1036

found_parents[key] = parents

1203

1037

return found_parents

1204

1038

1205

has_key = _has_key_from_parent_map

1206

1207

1039

def insert_index(self, pos, index):

1208

1040

"""Insert a new index in the list of indices to query.

1209

1041

1223

1055

the most efficient order for the index.

1224

1056

"""

1225

1057

seen_keys = set()

1226

while True:

1227

try:

1228

for index in self._indices:

1229

for node in index.iter_all_entries():

1230

if node[1] not in seen_keys:

1231

yield node

1232

seen_keys.add(node[1])

1233

return

1234

except errors.NoSuchFile:

1235

self._reload_or_raise()

1058

for index in self._indices:

1059

for node in index.iter_all_entries():

1060

if node[1] not in seen_keys:

1061

yield node

1062

seen_keys.add(node[1])

1236

1063

1237

1064

def iter_entries(self, keys):

1238

1065

"""Iterate over keys within the index.

1246

1073

efficient order for the index.

1247

1074

"""

1248

1075

keys = set(keys)

1249

while True:

1250

try:

1251

for index in self._indices:

1252

if not keys:

1253

return

1254

for node in index.iter_entries(keys):

1255

keys.remove(node[1])

1256

yield node

1076

for index in self._indices:

1077

if not keys:

1257

1078

return

1258

except errors.NoSuchFile:

1259

self._reload_or_raise()

1079

for node in index.iter_entries(keys):

1080

keys.remove(node[1])

1081

yield node

1260

1082

1261

1083

def iter_entries_prefix(self, keys):

1262

1084

"""Iterate over keys within the index using prefix matching.

1282

1104

if not keys:

1283

1105

return

1284

1106

seen_keys = set()

1285

while True:

1286

try:

1287

for index in self._indices:

1288

for node in index.iter_entries_prefix(keys):

1289

if node[1] in seen_keys:

1290

continue

1291

seen_keys.add(node[1])

1292

yield node

1293

return

1294

except errors.NoSuchFile:

1295

self._reload_or_raise()

1107

for index in self._indices:

1108

for node in index.iter_entries_prefix(keys):

1109

if node[1] in seen_keys:

1110

continue

1111

seen_keys.add(node[1])

1112

yield node

1296

1113

1297

1114

def key_count(self):

1298

1115

"""Return an estimate of the number of keys in this index.

1299

1116

1300

1117

For CombinedGraphIndex this is approximated by the sum of the keys of

1301

1118

the child indices. As child indices may have duplicate keys this can

1302

1119

have a maximum error of the number of child indices * largest number of

1303

1120

keys in any index.

1304

1121

"""

1305

while True:

1306

try:

1307

return sum((index.key_count() for index in self._indices), 0)

1308

except errors.NoSuchFile:

1309

self._reload_or_raise()

1310

1311

missing_keys = _missing_keys_from_parent_map

1312

1313

def _reload_or_raise(self):

1314

"""We just got a NoSuchFile exception.

1315

1316

Try to reload the indices, if it fails, just raise the current

1317

exception.

1318

"""

1319

if self._reload_func is None:

1320

raise

1321

exc_type, exc_value, exc_traceback = sys.exc_info()

1322

trace.mutter('Trying to reload after getting exception: %s',

1323

exc_value)

1324

if not self._reload_func():

1325

# We tried to reload, but nothing changed, so we fail anyway

1326

trace.mutter('_reload_func indicated nothing has changed.'

1327

' Raising original exception.')

1328

raise exc_type, exc_value, exc_traceback

1122

return sum((index.key_count() for index in self._indices), 0)

1329

1123

1330

1124

def validate(self):

1331

1125

"""Validate that everything in the index can be accessed."""

1332

while True:

1333

try:

1334

for index in self._indices:

1335

index.validate()

1336

return

1337

except errors.NoSuchFile:

1338

self._reload_or_raise()

1126

for index in self._indices:

1127

index.validate()

1339

1128

1340

1129

1341

1130

class InMemoryGraphIndex(GraphIndexBuilder):

1434

1223

else:

1435

1224

yield self, key, node[2]

1436

1225

return

1437

nodes_by_key = self._get_nodes_by_key()

1438

1226

for key in keys:

1439

1227

# sanity check

1440

1228

if key[0] is None:

1442

1230

if len(key) != self._key_length:

1443

1231

raise errors.BadIndexKey(key)

1444

1232

# find what it refers to:

1445

key_dict = nodes_by_key

1233

key_dict = self._nodes_by_key

1446

1234

elements = list(key)

1447

1235

# find the subdict to return

1448

1236

try:

Older »