~bzr-pqm/bzr/bzr.dev : contents of bzrlib/index.py at revision 6379.6.3

~bzr-pqm/bzr/bzr.dev : (revision 6379.6.3)

5752.3.8 by John Arbash Meinel Merge bzr.dev 5764 to resolve release-notes (aka NEWS) conflicts	1	# Copyright (C) 2007-2011 Canonical Ltd
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	16
6379.6.1 by Jelmer Vernooij Import absolute_import in a few places.	17	from __future__ import absolute_import
	18
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	19	"""Indexing facilities."""
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	20
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	21	__all__ = [
	22	'CombinedGraphIndex',
	23	'GraphIndex',
	24	'GraphIndexBuilder',
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	25	'GraphIndexPrefixAdapter',
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	26	'InMemoryGraphIndex',
	27	]
2592.1.32 by Robert Collins Add __all__ to index.	28
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	29	from bisect import bisect_right
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	30	from cStringIO import StringIO
2592.1.12 by Robert Collins Handle basic node adds.	31	import re
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	32	import sys
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	33
2624.2.15 by Robert Collins Add useful -Dindex flag.	34	from bzrlib.lazy_import import lazy_import
2624.2.15 by Robert Collins Add useful -Dindex flag.	35	lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	36	from bzrlib import (
	37	bisect_multi,
	38	revision as _mod_revision,
	39	trace,
	40	)
2624.2.15 by Robert Collins Add useful -Dindex flag.	41	""")
3099.3.3 by John Arbash Meinel Deprecate get_parents() in favor of get_parent_map()	42	from bzrlib import (
	43	debug,
	44	errors,
	45	)
4679.8.3 by John Arbash Meinel Expose bzrlib.static_tuple.StaticTuple as a thunk	46	from bzrlib.static_tuple import StaticTuple
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	47
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	48	_HEADER_READV = (0, 200)
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	49	_OPTION_KEY_ELEMENTS = "key_elements="
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	50	_OPTION_LEN = "len="
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	51	_OPTION_NODE_REFS = "node_ref_lists="
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	52	_SIGNATURE = "Bazaar Graph Index 1\n"
	53
	54
2592.1.14 by Robert Collins Detect bad reference key values.	55	_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
2592.1.12 by Robert Collins Handle basic node adds.	56	_newline_null_re = re.compile('[\n\0]')
	57
	58
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	59	def _has_key_from_parent_map(self, key):
	60	"""Check if this index has one key.
	61
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	62	If it's possible to check for multiple keys at once through
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	63	calling get_parent_map that should be faster.
	64	"""
	65	return (key in self.get_parent_map([key]))
	66
3830.3.20 by John Arbash Meinel Minor PEP8 and copyright updates.	67
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	68	def _missing_keys_from_parent_map(self, keys):
	69	return set(keys) - set(self.get_parent_map(keys))
	70
	71
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	72	class GraphIndexBuilder(object):
2592.1.18 by Robert Collins Add space to mark absent nodes.	73	"""A builder that can build a GraphIndex.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	74
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	75	The resulting graph has the structure::
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	76
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	77	_SIGNATURE OPTIONS NODES NEWLINE
	78	_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
	79	OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
	80	NODES := NODE*
	81	NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
	82	KEY := Not-whitespace-utf8
	83	ABSENT := 'a'
	84	REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
	85	REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
	86	REFERENCE := DIGITS ; digits is the byte offset in the index of the
	87	; referenced key.
	88	VALUE := no-newline-no-null-bytes
2592.1.18 by Robert Collins Add space to mark absent nodes.	89	"""
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	90
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	91	def __init__(self, reference_lists=0, key_elements=1):
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	92	"""Create a GraphIndex builder.
	93
	94	:param reference_lists: The number of node references lists for each
	95	entry.
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	96	:param key_elements: The number of bytestrings in each key.
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	97	"""
	98	self.reference_lists = reference_lists
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	99	# A dict of {key: (absent, ref_lists, value)}
2592.1.15 by Robert Collins Detect duplicate key insertion.	100	self._nodes = {}
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	101	# Keys that are referenced but not actually present in this index
	102	self._absent_keys = set()
3644.2.1 by John Arbash Meinel Change the IndexBuilders to not generate the nodes_by_key unless needed.	103	self._nodes_by_key = None
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	104	self._key_length = key_elements
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	105	self._optimize_for_size = False
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	106	self._combine_backing_indices = True
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	107
	108	def _check_key(self, key):
	109	"""Raise BadIndexKey if key is not a valid key for this index."""
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	110	if type(key) not in (tuple, StaticTuple):
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	111	raise errors.BadIndexKey(key)
	112	if self._key_length != len(key):
	113	raise errors.BadIndexKey(key)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	114	for element in key:
	115	if not element or _whitespace_re.search(element) is not None:
	116	raise errors.BadIndexKey(element)
2592.1.12 by Robert Collins Handle basic node adds.	117
3830.3.5 by Martin Pool GraphIndexBuilder shouldn't know references are for compression so rename	118	def _external_references(self):
	119	"""Return references that are not present in this index.
3830.3.4 by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too	120	"""
	121	keys = set()
	122	refs = set()
3830.3.19 by John Arbash Meinel Small update to GraphIndexBuilder._external_references	123	# TODO: JAM 2008-11-21 This makes an assumption about how the reference
	124	# lists are used. It is currently correct for pack-0.92 through
	125	# 1.9, which use the node references (3rd column) second
	126	# reference list as the compression parent. Perhaps this should
	127	# be moved into something higher up the stack, since it
	128	# makes assumptions about how the index is used.
	129	if self.reference_lists > 1:
	130	for node in self.iter_all_entries():
	131	keys.add(node[1])
	132	refs.update(node[3][1])
	133	return refs - keys
	134	else:
	135	# If reference_lists == 0 there can be no external references, and
	136	# if reference_lists == 1, then there isn't a place to store the
	137	# compression parent
	138	return set()
3830.3.4 by Martin Pool Move _external_compression_references onto the GraphIndexBuilder, and check them for inventories too	139
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	140	def _get_nodes_by_key(self):
	141	if self._nodes_by_key is None:
	142	nodes_by_key = {}
	143	if self.reference_lists:
	144	for key, (absent, references, value) in self._nodes.iteritems():
	145	if absent:
	146	continue
	147	key_dict = nodes_by_key
	148	for subkey in key[:-1]:
	149	key_dict = key_dict.setdefault(subkey, {})
	150	key_dict[key[-1]] = key, value, references
	151	else:
	152	for key, (absent, references, value) in self._nodes.iteritems():
	153	if absent:
	154	continue
	155	key_dict = nodes_by_key
	156	for subkey in key[:-1]:
	157	key_dict = key_dict.setdefault(subkey, {})
	158	key_dict[key[-1]] = key, value
	159	self._nodes_by_key = nodes_by_key
	160	return self._nodes_by_key
	161
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	162	def _update_nodes_by_key(self, key, value, node_refs):
	163	"""Update the _nodes_by_key dict with a new key.
	164
	165	For a key of (foo, bar, baz) create
	166	_nodes_by_key[foo][bar][baz] = key_value
	167	"""
	168	if self._nodes_by_key is None:
	169	return
	170	key_dict = self._nodes_by_key
	171	if self.reference_lists:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	172	key_value = StaticTuple(key, value, node_refs)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	173	else:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	174	key_value = StaticTuple(key, value)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	175	for subkey in key[:-1]:
	176	key_dict = key_dict.setdefault(subkey, {})
	177	key_dict[key[-1]] = key_value
	178
3644.2.9 by John Arbash Meinel Refactor some code.	179	def _check_key_ref_value(self, key, references, value):
3644.2.9 by John Arbash Meinel Refactor some code.	180	"""Check that 'key' and 'references' are all valid.
2592.1.12 by Robert Collins Handle basic node adds.	181
3644.2.9 by John Arbash Meinel Refactor some code.	182	:param key: A key tuple. Must conform to the key interface (be a tuple,
	183	be of the right length, not have any whitespace or nulls in any key
	184	element.)
	185	:param references: An iterable of reference lists. Something like
	186	[[(ref, key)], [(ref, key), (other, key)]]
	187	:param value: The value associate with this key. Must not contain
	188	newlines or null characters.
	189	:return: (node_refs, absent_references)
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	190
	191	* node_refs: basically a packed form of 'references' where all
	192	iterables are tuples
	193	* absent_references: reference keys that are not in self._nodes.
	194	This may contain duplicates if the same key is referenced in
	195	multiple lists.
2592.1.12 by Robert Collins Handle basic node adds.	196	"""
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	197	as_st = StaticTuple.from_sequence
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	198	self._check_key(key)
2592.1.12 by Robert Collins Handle basic node adds.	199	if _newline_null_re.search(value) is not None:
2592.1.12 by Robert Collins Handle basic node adds.	200	raise errors.BadIndexValue(value)
2592.1.13 by Robert Collins Handle mismatched numbers of reference lists.	201	if len(references) != self.reference_lists:
	202	raise errors.BadIndexValue(references)
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	203	node_refs = []
3644.2.9 by John Arbash Meinel Refactor some code.	204	absent_references = []
2592.1.14 by Robert Collins Detect bad reference key values.	205	for reference_list in references:
	206	for reference in reference_list:
3644.2.9 by John Arbash Meinel Refactor some code.	207	# If reference is in self._nodes, then we know it has already
3644.2.9 by John Arbash Meinel Refactor some code.	208	# been checked.
2592.1.25 by Robert Collins Fix and tune node offset calculation.	209	if reference not in self._nodes:
3644.2.9 by John Arbash Meinel Refactor some code.	210	self._check_key(reference)
3644.2.9 by John Arbash Meinel Refactor some code.	211	absent_references.append(reference)
4848.1.1 by John Arbash Meinel Track down one more location that needs casting to static tuple for the new builder code	212	reference_list = as_st([as_st(ref).intern()
	213	for ref in reference_list])
	214	node_refs.append(reference_list)
4789.28.1 by John Arbash Meinel Use StaticTuple as part of the builder process.	215	return as_st(node_refs), absent_references
3644.2.9 by John Arbash Meinel Refactor some code.	216
	217	def add_node(self, key, value, references=()):
	218	"""Add a node to the index.
	219
	220	:param key: The key. keys are non-empty tuples containing
	221	as many whitespace-free utf8 bytestrings as the key length
	222	defined for this index.
	223	:param references: An iterable of iterables of keys. Each is a
	224	reference to another key.
	225	:param value: The value to associate with the key. It may be any
5891.1.3 by Andrew Bennetts Move docstring formatting fixes.	226	bytes as long as it does not contain \\0 or \\n.
3644.2.9 by John Arbash Meinel Refactor some code.	227	"""
	228	(node_refs,
	229	absent_references) = self._check_key_ref_value(key, references, value)
	230	if key in self._nodes and self._nodes[key][0] != 'a':
2592.1.15 by Robert Collins Detect duplicate key insertion.	231	raise errors.BadIndexDuplicateKey(key, self)
3644.2.9 by John Arbash Meinel Refactor some code.	232	for reference in absent_references:
	233	# There may be duplicates, but I don't think it is worth worrying
	234	# about
	235	self._nodes[reference] = ('a', (), '')
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	236	self._absent_keys.update(absent_references)
	237	self._absent_keys.discard(key)
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	238	self._nodes[key] = ('', node_refs, value)
3644.2.9 by John Arbash Meinel Refactor some code.	239	if self._nodes_by_key is not None and self._key_length > 1:
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	240	self._update_nodes_by_key(key, value, node_refs)
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	241
4744.2.7 by John Arbash Meinel Add .clear_cache() members to GraphIndexBuilder and BTreeBuilder.	242	def clear_cache(self):
	243	"""See GraphIndex.clear_cache()
	244
	245	This is a no-op, but we need the api to conform to a generic 'Index'
	246	abstraction.
	247	"""
	248
2592.1.4 by Robert Collins Create a GraphIndexBuilder.	249	def finish(self):
6006.4.5 by Martin Pool Flush pack, index, and dirstate files to disk on closing	250	"""Finish the index.
	251
	252	:returns: cStringIO holding the full context of the index as it
	253	should be written to disk.
	254	"""
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	255	lines = [_SIGNATURE]
	256	lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	257	lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	258	key_count = len(self._nodes) - len(self._absent_keys)
	259	lines.append(_OPTION_LEN + str(key_count) + '\n')
2624.2.11 by Robert Collins Review comments.	260	prefix_length = sum(len(x) for x in lines)
2592.1.22 by Robert Collins Node references are byte offsets.	261	# references are byte offsets. To avoid having to do nasty
3644.2.9 by John Arbash Meinel Refactor some code.	262	# polynomial work to resolve offsets (references to later in the
2592.1.22 by Robert Collins Node references are byte offsets.	263	# file cannot be determined until all the inbetween references have
	264	# been calculated too) we pad the offsets with 0's to make them be
	265	# of consistent length. Using binary offsets would break the trivial
	266	# file parsing.
	267	# to calculate the width of zero's needed we do three passes:
	268	# one to gather all the non-reference data and the number of references.
	269	# one to pad all the data with reference-length and determine entry
	270	# addresses.
	271	# One to serialise.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	272
2592.1.40 by Robert Collins Reverse index ordering - we do not have date prefixed revids.	273	# forward sorted by key. In future we may consider topological sorting,
	274	# at the cost of table scans for direct lookup, or a second index for
	275	# direct lookup
	276	nodes = sorted(self._nodes.items())
2592.1.42 by Robert Collins Check the index length is as expected, when we have done preprocessing.	277	# if we do not prepass, we don't know how long it will be up front.
	278	expected_bytes = None
2592.1.25 by Robert Collins Fix and tune node offset calculation.	279	# we only need to pre-pass if we have reference lists at all.
	280	if self.reference_lists:
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	281	key_offset_info = []
2592.1.25 by Robert Collins Fix and tune node offset calculation.	282	non_ref_bytes = prefix_length
	283	total_references = 0
	284	# TODO use simple multiplication for the constants in this loop.
	285	for key, (absent, references, value) in nodes:
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	286	# record the offset known so far for this key:
	287	# the non reference bytes to date, and the total references to
	288	# date - saves reaccumulating on the second pass
	289	key_offset_info.append((key, non_ref_bytes, total_references))
2592.1.25 by Robert Collins Fix and tune node offset calculation.	290	# key is literal, value is literal, there are 3 null's, 1 NL
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	291	# key is variable length tuple, \x00 between elements
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	292	non_ref_bytes += sum(len(element) for element in key)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	293	if self._key_length > 1:
	294	non_ref_bytes += self._key_length - 1
2624.2.5 by Robert Collins Change bzrlib.index.Index keys to be 1-tuples, not strings.	295	# value is literal bytes, there are 3 null's, 1 NL.
	296	non_ref_bytes += len(value) + 3 + 1
2592.1.25 by Robert Collins Fix and tune node offset calculation.	297	# one byte for absent if set.
	298	if absent:
	299	non_ref_bytes += 1
2592.1.36 by Robert Collins Bugfix incorrect offset generation when an absent record is before a referenced record.	300	elif self.reference_lists:
2592.1.25 by Robert Collins Fix and tune node offset calculation.	301	# (ref_lists -1) tabs
	302	non_ref_bytes += self.reference_lists - 1
	303	# (ref-1 cr's per ref_list)
	304	for ref_list in references:
	305	# how many references across the whole file?
	306	total_references += len(ref_list)
	307	# accrue reference separators
	308	if ref_list:
	309	non_ref_bytes += len(ref_list) - 1
	310	# how many digits are needed to represent the total byte count?
	311	digits = 1
2592.1.22 by Robert Collins Node references are byte offsets.	312	possible_total_bytes = non_ref_bytes + total_references*digits
2592.1.25 by Robert Collins Fix and tune node offset calculation.	313	while 10 ** digits < possible_total_bytes:
	314	digits += 1
	315	possible_total_bytes = non_ref_bytes + total_references*digits
2592.1.42 by Robert Collins Check the index length is as expected, when we have done preprocessing.	316	expected_bytes = possible_total_bytes + 1 # terminating newline
2592.1.25 by Robert Collins Fix and tune node offset calculation.	317	# resolve key addresses.
	318	key_addresses = {}
2592.1.41 by Robert Collins Remove duplication in the index serialisation logic with John's suggestion.	319	for key, non_ref_bytes, total_references in key_offset_info:
	320	key_addresses[key] = non_ref_bytes + total_references*digits
2592.1.25 by Robert Collins Fix and tune node offset calculation.	321	# serialise
	322	format_string = '%%0%sd' % digits
	323	for key, (absent, references, value) in nodes:
2592.1.19 by Robert Collins Node references are tab separated.	324	flattened_references = []
	325	for ref_list in references:
2592.1.22 by Robert Collins Node references are byte offsets.	326	ref_addresses = []
	327	for reference in ref_list:
	328	ref_addresses.append(format_string % key_addresses[reference])
	329	flattened_references.append('\r'.join(ref_addresses))
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	330	string_key = '\x00'.join(key)
2624.2.11 by Robert Collins Review comments.	331	lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
2592.1.19 by Robert Collins Node references are tab separated.	332	'\t'.join(flattened_references), value))
2592.1.6 by Robert Collins Record the number of node reference lists a particular index has.	333	lines.append('\n')
2592.1.42 by Robert Collins Check the index length is as expected, when we have done preprocessing.	334	result = StringIO(''.join(lines))
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	335	if expected_bytes and len(result.getvalue()) != expected_bytes:
	336	raise errors.BzrError('Failed index creation. Internal error:'
	337	' mismatched output length and expected length: %d %d' %
	338	(len(result.getvalue()), expected_bytes))
3498.1.1 by James Westby Don't join the lines of the index twice.	339	return result
2592.1.5 by Robert Collins Trivial index reading.	340
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	341	def set_optimize(self, for_size=None, combine_backing_indices=None):
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	342	"""Change how the builder tries to optimize the result.
	343
	344	:param for_size: Tell the builder to try and make the index as small as
	345	possible.
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	346	:param combine_backing_indices: If the builder spills to disk to save
	347	memory, should the on-disk indices be combined. Set to True if you
	348	are going to be probing the index, but to False if you are not. (If
	349	you are not querying, then the time spent combining is wasted.)
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	350	:return: None
	351	"""
	352	# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
	353	# other builders do.
4168.3.6 by John Arbash Meinel Add 'combine_backing_indices' as a flag for GraphIndex.set_optimize().	354	if for_size is not None:
	355	self._optimize_for_size = for_size
	356	if combine_backing_indices is not None:
	357	self._combine_backing_indices = combine_backing_indices
3777.5.3 by John Arbash Meinel Add Builder.set_optimize(for_size=True) for GraphIndexBuilder and BTreeBuilder.	358
4593.5.37 by John Arbash Meinel Finish implementation tests.	359	def find_ancestry(self, keys, ref_list_num):
	360	"""See CombinedGraphIndex.find_ancestry()"""
	361	pending = set(keys)
	362	parent_map = {}
	363	missing_keys = set()
	364	while pending:
	365	next_pending = set()
	366	for _, key, value, ref_lists in self.iter_entries(pending):
	367	parent_keys = ref_lists[ref_list_num]
	368	parent_map[key] = parent_keys
	369	next_pending.update([p for p in parent_keys if p not in
	370	parent_map])
	371	missing_keys.update(pending.difference(parent_map))
	372	pending = next_pending
	373	return parent_map, missing_keys
	374
2592.1.5 by Robert Collins Trivial index reading.	375
	376	class GraphIndex(object):
	377	"""An index for data with embedded graphs.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	378
2592.1.10 by Robert Collins Make validate detect node reference parsing errors.	379	The index maps keys to a list of key reference lists, and a value.
	380	Each node has the same number of key reference lists. Each key reference
	381	list can be empty or an arbitrary length. The value is an opaque NULL
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	382	terminated string without any newlines. The storage of the index is
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	383	hidden in the interface: keys and key references are always tuples of
	384	bytestrings, never the internal representation (e.g. dictionary offsets).
2592.1.30 by Robert Collins Absent entries are not yeilded.	385
2592.1.30 by Robert Collins Absent entries are not yeilded.	386	It is presumed that the index will not be mutated - it is static data.
2592.1.34 by Robert Collins Cleanup docs.	387
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	388	Successive iter_all_entries calls will read the entire index each time.
	389	Additionally, iter_entries calls will read the index linearly until the
	390	desired keys are found. XXX: This must be fixed before the index is
2592.1.34 by Robert Collins Cleanup docs.	391	suitable for production use. :XXX
2592.1.5 by Robert Collins Trivial index reading.	392	"""
2592.1.5 by Robert Collins Trivial index reading.	393
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	394	def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
2592.1.5 by Robert Collins Trivial index reading.	395	"""Open an index called name on transport.
	396
	397	:param transport: A bzrlib.transport.Transport.
	398	:param name: A path to provide to transport API calls.
2890.2.1 by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the	399	:param size: The size of the index in bytes. This is used for bisection
	400	logic to perform partial index reads. While the size could be
	401	obtained by statting the file this introduced an additional round
2890.2.8 by Robert Collins Make the size of the index optionally None for the pack-names index.	402	trip as well as requiring stat'able transports, both of which are
	403	avoided by having it supplied. If size is None, then bisection
	404	support will be disabled and accessing the index will just stream
	405	all the data.
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	406	:param offset: Instead of starting the index data at offset 0, start it
	407	at an arbitrary offset.
2592.1.5 by Robert Collins Trivial index reading.	408	"""
	409	self._transport = transport
	410	self._name = name
2890.2.16 by Robert Collins Review feedback.	411	# Becomes a dict of key:(value, reference-list-byte-locations) used by
2890.2.16 by Robert Collins Review feedback.	412	# the bisection interface to store parsed but not resolved keys.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	413	self._bisect_nodes = None
2890.2.16 by Robert Collins Review feedback.	414	# Becomes a dict of key:(value, reference-list-keys) which are ready to
2890.2.16 by Robert Collins Review feedback.	415	# be returned directly to callers.
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	416	self._nodes = None
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	417	# a sorted list of slice-addresses for the parsed bytes of the file.
	418	# e.g. (0,1) would mean that byte 0 is parsed.
2890.2.2 by Robert Collins Opening an index creates a map for the parsed bytes.	419	self._parsed_byte_map = []
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	420	# a sorted list of keys matching each slice address for parsed bytes
	421	# e.g. (None, 'foo@bar') would mean that the first byte contained no
	422	# key, and the end byte of the slice is the of the data for 'foo@bar'
	423	self._parsed_key_map = []
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	424	self._key_count = None
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	425	self._keys_by_offset = None
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	426	self._nodes_by_key = None
2890.2.1 by Robert Collins * ``bzrlib.index.GraphIndex`` now requires a size parameter to the	427	self._size = size
3665.3.3 by John Arbash Meinel If we read more than 50% of the whole index,	428	# The number of bytes we've read so far in trying to process this file
	429	self._bytes_read = 0
5074.4.2 by John Arbash Meinel Add 'offset=' to the GraphIndex api, but refuse to let it be nonzero for now.	430	self._base_offset = offset
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	431
2592.3.176 by Robert Collins Various pack refactorings.	432	def __eq__(self, other):
2592.3.215 by Robert Collins Review feedback.	433	"""Equal when self and other were created with the same parameters."""
2592.3.176 by Robert Collins Various pack refactorings.	434	return (
	435	type(self) == type(other) and
	436	self._transport == other._transport and
	437	self._name == other._name and
	438	self._size == other._size)
	439
	440	def __ne__(self, other):
	441	return not self.__eq__(other)
	442
3517.4.13 by Martin Pool Add repr methods	443	def __repr__(self):
	444	return "%s(%r)" % (self.__class__.__name__,
	445	self._transport.abspath(self._name))
	446
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	447	def _buffer_all(self, stream=None):
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	448	"""Buffer all the index data.
	449
	450	Mutates self._nodes and self.keys_by_offset.
2592.1.5 by Robert Collins Trivial index reading.	451	"""
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	452	if self._nodes is not None:
	453	# We already did this
	454	return
2624.2.15 by Robert Collins Add useful -Dindex flag.	455	if 'index' in debug.debug_flags:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	456	trace.mutter('Reading entire index %s',
	457	self._transport.abspath(self._name))
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	458	if stream is None:
	459	stream = self._transport.get(self._name)
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	460	if self._base_offset != 0:
	461	# This is wasteful, but it is better than dealing with
	462	# adjusting all the offsets, etc.
	463	stream = StringIO(stream.read()[self._base_offset:])
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	464	self._read_prefix(stream)
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	465	self._expected_elements = 3 + self._key_length
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	466	line_count = 0
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	467	# raw data keyed by offset
	468	self._keys_by_offset = {}
	469	# ready-to-return key:value or key:value, node_ref_lists
	470	self._nodes = {}
3711.3.13 by John Arbash Meinel Shave off another 5s by not building 'node_by_key'	471	self._nodes_by_key = None
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	472	trailers = 0
	473	pos = stream.tell()
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	474	lines = stream.read().split('\n')
4708.2.1 by Martin Ensure all files opened by bazaar proper are explicitly closed	475	# GZ 2009-09-20: Should really use a try/finally block to ensure close
	476	stream.close()
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	477	del lines[-1]
	478	_, _, _, trailers = self._parse_lines(lines, pos)
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	479	for key, absent, references, value in self._keys_by_offset.itervalues():
2592.1.30 by Robert Collins Absent entries are not yeilded.	480	if absent:
2592.1.30 by Robert Collins Absent entries are not yeilded.	481	continue
2592.1.28 by Robert Collins Basic two pass iter_all_entries.	482	# resolve references:
	483	if self.node_ref_lists:
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	484	node_value = (value, self._resolve_references(references))
2592.1.28 by Robert Collins Basic two pass iter_all_entries.	485	else:
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	486	node_value = value
	487	self._nodes[key] = node_value
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	488	# cache the keys for quick set intersections
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	489	if trailers != 1:
	490	# there must be one line - the empty trailer line.
	491	raise errors.BadIndexData(self)
	492
4744.2.6 by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member.	493	def clear_cache(self):
	494	"""Clear out any cached/memoized values.
	495
	496	This can be called at any time, but generally it is used when we have
	497	extracted some information, but don't expect to be requesting any more
	498	from this index.
	499	"""
	500
4011.5.11 by Robert Collins Polish the KnitVersionedFiles.scan_unvalidated_index api.	501	def external_references(self, ref_list_num):
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	502	"""Return references that are not present in this index.
	503	"""
	504	self._buffer_all()
4011.5.3 by Andrew Bennetts Implement and test external_references on GraphIndex and BTreeGraphIndex.	505	if ref_list_num + 1 > self.node_ref_lists:
	506	raise ValueError('No ref list %d, index has %d ref lists'
	507	% (ref_list_num, self.node_ref_lists))
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	508	refs = set()
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	509	nodes = self._nodes
	510	for key, (value, ref_lists) in nodes.iteritems():
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	511	ref_list = ref_lists[ref_list_num]
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	512	refs.update([ref for ref in ref_list if ref not in nodes])
	513	return refs
4011.5.2 by Andrew Bennetts Add more tests, improve existing tests, add GraphIndex._external_references()	514
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	515	def _get_nodes_by_key(self):
	516	if self._nodes_by_key is None:
	517	nodes_by_key = {}
	518	if self.node_ref_lists:
	519	for key, (value, references) in self._nodes.iteritems():
	520	key_dict = nodes_by_key
	521	for subkey in key[:-1]:
	522	key_dict = key_dict.setdefault(subkey, {})
	523	key_dict[key[-1]] = key, value, references
	524	else:
	525	for key, value in self._nodes.iteritems():
	526	key_dict = nodes_by_key
	527	for subkey in key[:-1]:
	528	key_dict = key_dict.setdefault(subkey, {})
	529	key_dict[key[-1]] = key, value
	530	self._nodes_by_key = nodes_by_key
	531	return self._nodes_by_key
	532
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	533	def iter_all_entries(self):
	534	"""Iterate over all keys within the index.
	535
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	536	:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	537	The former tuple is used when there are no reference lists in the
	538	index, making the API compatible with simple key:value index types.
	539	There is no defined order for the result iteration - it will be in
	540	the most efficient order for the index.
	541	"""
2745.1.1 by Robert Collins Add a number of -Devil checkpoints.	542	if 'evil' in debug.debug_flags:
2592.3.112 by Robert Collins Various fixups found dogfooding.	543	trace.mutter_callsite(3,
2745.1.2 by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.	544	"iter_all_entries scales with size of history.")
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	545	if self._nodes is None:
	546	self._buffer_all()
	547	if self.node_ref_lists:
	548	for key, (value, node_ref_lists) in self._nodes.iteritems():
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	549	yield self, key, value, node_ref_lists
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	550	else:
	551	for key, value in self._nodes.iteritems():
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	552	yield self, key, value
2624.2.2 by Robert Collins Temporary performance hack for GraphIndex : load the entire index once and only once into ram.	553
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	554	def _read_prefix(self, stream):
	555	signature = stream.read(len(self._signature()))
	556	if not signature == self._signature():
	557	raise errors.BadIndexFormatSignature(self._name, GraphIndex)
	558	options_line = stream.readline()
	559	if not options_line.startswith(_OPTION_NODE_REFS):
	560	raise errors.BadIndexOptions(self)
	561	try:
	562	self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
	563	except ValueError:
	564	raise errors.BadIndexOptions(self)
2624.2.8 by Robert Collins Explicitly mark the number of keys elements in use in GraphIndex files.	565	options_line = stream.readline()
	566	if not options_line.startswith(_OPTION_KEY_ELEMENTS):
	567	raise errors.BadIndexOptions(self)
	568	try:
	569	self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
	570	except ValueError:
	571	raise errors.BadIndexOptions(self)
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	572	options_line = stream.readline()
	573	if not options_line.startswith(_OPTION_LEN):
	574	raise errors.BadIndexOptions(self)
	575	try:
	576	self._key_count = int(options_line[len(_OPTION_LEN):-1])
	577	except ValueError:
	578	raise errors.BadIndexOptions(self)
2592.1.5 by Robert Collins Trivial index reading.	579
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	580	def _resolve_references(self, references):
2890.2.16 by Robert Collins Review feedback.	581	"""Return the resolved key references for references.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	582
2890.2.16 by Robert Collins Review feedback.	583	References are resolved by looking up the location of the key in the
	584	_keys_by_offset map and substituting the key name, preserving ordering.
	585
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	586	:param references: An iterable of iterables of key locations. e.g.
2890.2.16 by Robert Collins Review feedback.	587	[[123, 456], [123]]
	588	:return: A tuple of tuples of keys.
	589	"""
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	590	node_refs = []
	591	for ref_list in references:
	592	node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
	593	return tuple(node_refs)
	594
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	595	def _find_index(self, range_map, key):
	596	"""Helper for the _parsed_*_index calls.
	597
	598	Given a range map - [(start, end), ...], finds the index of the range
	599	in the map for key if it is in the map, and if it is not there, the
	600	immediately preceeding range in the map.
	601	"""
	602	result = bisect_right(range_map, key) - 1
	603	if result + 1 < len(range_map):
	604	# check the border condition, it may be in result + 1
	605	if range_map[result + 1][0] == key[0]:
	606	return result + 1
	607	return result
	608
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	609	def _parsed_byte_index(self, offset):
	610	"""Return the index of the entry immediately before offset.
	611
	612	e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
	613	there is one unparsed byte (the 11th, addressed as[10]). then:
	614	asking for 0 will return 0
	615	asking for 10 will return 0
	616	asking for 11 will return 1
	617	asking for 12 will return 1
	618	"""
	619	key = (offset, 0)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	620	return self._find_index(self._parsed_byte_map, key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	621
	622	def _parsed_key_index(self, key):
	623	"""Return the index of the entry immediately before key.
	624
	625	e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
	626	meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
	627	have been parsed, then:
	628	asking for '' will return 0
	629	asking for 'a' will return 0
	630	asking for 'b' will return 1
	631	asking for 'e' will return 1
	632	"""
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	633	search_key = (key, None)
	634	return self._find_index(self._parsed_key_map, search_key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	635
	636	def _is_parsed(self, offset):
	637	"""Returns True if offset has been parsed."""
	638	index = self._parsed_byte_index(offset)
	639	if index == len(self._parsed_byte_map):
	640	return offset < self._parsed_byte_map[index - 1][1]
	641	start, end = self._parsed_byte_map[index]
	642	return offset >= start and offset < end
	643
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	644	def _iter_entries_from_total_buffer(self, keys):
	645	"""Iterate over keys when the entire index is parsed."""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	646	# Note: See the note in BTreeBuilder.iter_entries for why we don't use
	647	# .intersection() here
	648	nodes = self._nodes
	649	keys = [key for key in keys if key in nodes]
2624.2.3 by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans.	650	if self.node_ref_lists:
	651	for key in keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	652	value, node_refs = nodes[key]
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	653	yield self, key, value, node_refs
2624.2.3 by Robert Collins Make GraphIndex.iter_entries do hash lookups rather than table scans.	654	else:
	655	for key in keys:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	656	yield self, key, nodes[key]
2592.1.7 by Robert Collins A validate that goes boom.	657
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	658	def iter_entries(self, keys):
	659	"""Iterate over keys within the index.
	660
	661	:param keys: An iterable providing the keys to be retrieved.
	662	:return: An iterable as per iter_all_entries, but restricted to the
	663	keys supplied. No additional keys will be returned, and every
	664	key supplied that is in the index will be returned.
	665	"""
	666	keys = set(keys)
	667	if not keys:
	668	return []
2890.2.8 by Robert Collins Make the size of the index optionally None for the pack-names index.	669	if self._size is None and self._nodes is None:
	670	self._buffer_all()
3665.3.3 by John Arbash Meinel If we read more than 50% of the whole index,	671
3606.6.1 by Robert Collins Cherry-pick Robert's index buffering.	672	# We fit about 20 keys per minimum-read (4K), so if we are looking for
	673	# more than 1/20th of the index its likely (assuming homogenous key
	674	# spread) that we'll read the entire index. If we're going to do that,
	675	# buffer the whole thing. A better analysis might take key spread into
	676	# account - but B+Tree indices are better anyway.
	677	# We could look at all data read, and use a threshold there, which will
	678	# trigger on ancestry walks, but that is not yet fully mapped out.
	679	if self._nodes is None and len(keys) * 20 > self.key_count():
	680	self._buffer_all()
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	681	if self._nodes is not None:
	682	return self._iter_entries_from_total_buffer(keys)
	683	else:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	684	return (result[1] for result in bisect_multi.bisect_multi_bytes(
2890.2.18 by Robert Collins Review feedback.	685	self._lookup_keys_via_location, self._size, keys))
2890.2.7 by Robert Collins * Pack indices are now partially parsed for specific key lookup using a	686
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	687	def iter_entries_prefix(self, keys):
	688	"""Iterate over keys within the index using prefix matching.
	689
	690	Prefix matching is applied within the tuple of a key, not to within
	691	the bytestring of each key element. e.g. if you have the keys ('foo',
	692	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	693	only the former key is returned.
	694
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	695	WARNING: Note that this method currently causes a full index parse
	696	unconditionally (which is reasonably appropriate as it is a means for
	697	thunking many small indices into one larger one and still supplies
	698	iter_all_entries at the thunk layer).
	699
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	700	:param keys: An iterable providing the key prefixes to be retrieved.
	701	Each key prefix takes the form of a tuple the length of a key, but
	702	with the last N elements 'None' rather than a regular bytestring.
	703	The first element cannot be 'None'.
	704	:return: An iterable as per iter_all_entries, but restricted to the
	705	keys with a matching prefix to those supplied. No additional keys
	706	will be returned, and every match that is in the index will be
	707	returned.
	708	"""
	709	keys = set(keys)
	710	if not keys:
	711	return
	712	# load data - also finds key lengths
	713	if self._nodes is None:
	714	self._buffer_all()
	715	if self._key_length == 1:
	716	for key in keys:
	717	# sanity check
	718	if key[0] is None:
	719	raise errors.BadIndexKey(key)
	720	if len(key) != self._key_length:
	721	raise errors.BadIndexKey(key)
	722	if self.node_ref_lists:
	723	value, node_refs = self._nodes[key]
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	724	yield self, key, value, node_refs
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	725	else:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	726	yield self, key, self._nodes[key]
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	727	return
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	728	nodes_by_key = self._get_nodes_by_key()
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	729	for key in keys:
	730	# sanity check
	731	if key[0] is None:
	732	raise errors.BadIndexKey(key)
	733	if len(key) != self._key_length:
	734	raise errors.BadIndexKey(key)
	735	# find what it refers to:
3711.3.21 by John Arbash Meinel Fix GraphIndex to properly generate _nodes_by_keys on demand.	736	key_dict = nodes_by_key
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	737	elements = list(key)
2624.2.11 by Robert Collins Review comments.	738	# find the subdict whose contents should be returned.
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	739	try:
	740	while len(elements) and elements[0] is not None:
	741	key_dict = key_dict[elements[0]]
	742	elements.pop(0)
	743	except KeyError:
	744	# a non-existant lookup.
	745	continue
	746	if len(elements):
	747	dicts = [key_dict]
	748	while dicts:
	749	key_dict = dicts.pop(-1)
	750	# can't be empty or would not exist
	751	item, value = key_dict.iteritems().next()
	752	if type(value) == dict:
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	753	# push keys
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	754	dicts.extend(key_dict.itervalues())
	755	else:
	756	# yield keys
	757	for value in key_dict.itervalues():
2624.2.11 by Robert Collins Review comments.	758	# each value is the key:value:node refs tuple
2624.2.11 by Robert Collins Review comments.	759	# ready to yield.
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	760	yield (self, ) + value
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	761	else:
2624.2.11 by Robert Collins Review comments.	762	# the last thing looked up was a terminal element
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	763	yield (self, ) + key_dict
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	764
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	765	def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
	766	"""See BTreeIndex._find_ancestors."""
4593.4.7 by John Arbash Meinel Basic implementation of a conforming interface for GraphIndex.	767	# The api can be implemented as a trivial overlay on top of
	768	# iter_entries, it is not an efficient implementation, but it at least
	769	# gets the job done.
	770	found_keys = set()
	771	search_keys = set()
	772	for index, key, value, refs in self.iter_entries(keys):
	773	parent_keys = refs[ref_list_num]
	774	found_keys.add(key)
	775	parent_map[key] = parent_keys
	776	search_keys.update(parent_keys)
	777	# Figure out what, if anything, was missing
	778	missing_keys.update(set(keys).difference(found_keys))
	779	search_keys = search_keys.difference(parent_map)
	780	return search_keys
	781
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	782	def key_count(self):
	783	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	784
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	785	For GraphIndex the estimate is exact.
	786	"""
	787	if self._key_count is None:
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	788	self._read_and_parse([_HEADER_READV])
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	789	return self._key_count
	790
2890.2.18 by Robert Collins Review feedback.	791	def _lookup_keys_via_location(self, location_keys):
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	792	"""Public interface for implementing bisection.
	793
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	794	If _buffer_all has been called, then all the data for the index is in
	795	memory, and this method should not be called, as it uses a separate
	796	cache because it cannot pre-resolve all indices, which buffer_all does
	797	for performance.
	798
2890.2.16 by Robert Collins Review feedback.	799	:param location_keys: A list of location(byte offset), key tuples.
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	800	:return: A list of (location_key, result) tuples as expected by
	801	bzrlib.bisect_multi.bisect_multi_bytes.
	802	"""
	803	# Possible improvements:
	804	# - only bisect lookup each key once
	805	# - sort the keys first, and use that to reduce the bisection window
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	806	# -----
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	807	# this progresses in three parts:
	808	# read data
	809	# parse it
	810	# attempt to answer the question from the now in memory data.
	811	# build the readv request
	812	# for each location, ask for 800 bytes - much more than rows we've seen
	813	# anywhere.
	814	readv_ranges = []
	815	for location, key in location_keys:
	816	# can we answer from cache?
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	817	if self._bisect_nodes and key in self._bisect_nodes:
	818	# We have the key parsed.
	819	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	820	index = self._parsed_key_index(key)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	821	if (len(self._parsed_key_map) and
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	822	self._parsed_key_map[index][0] <= key and
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	823	(self._parsed_key_map[index][1] >= key or
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	824	# end of the file has been parsed
	825	self._parsed_byte_map[index][1] == self._size)):
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	826	# the key has been parsed, so no lookup is needed even if its
	827	# not present.
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	828	continue
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	829	# - if we have examined this part of the file already - yes
	830	index = self._parsed_byte_index(location)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	831	if (len(self._parsed_byte_map) and
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	832	self._parsed_byte_map[index][0] <= location and
	833	self._parsed_byte_map[index][1] > location):
	834	# the byte region has been parsed, so no read is needed.
	835	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	836	length = 800
	837	if location + length > self._size:
	838	length = self._size - location
	839	# todo, trim out parsed locations.
	840	if length > 0:
	841	readv_ranges.append((location, length))
	842	# read the header if needed
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	843	if self._bisect_nodes is None:
2979.1.1 by Robert Collins Use the GraphIndex header to answer key_count queries rather than parsing the entire index unnecessarily.	844	readv_ranges.append(_HEADER_READV)
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	845	self._read_and_parse(readv_ranges)
3665.3.1 by John Arbash Meinel Updates to GraphIndex processing.	846	result = []
	847	if self._nodes is not None:
	848	# _read_and_parse triggered a _buffer_all because we requested the
	849	# whole data range
	850	for location, key in location_keys:
	851	if key not in self._nodes: # not present
	852	result.append(((location, key), False))
	853	elif self.node_ref_lists:
	854	value, refs = self._nodes[key]
	855	result.append(((location, key),
	856	(self, key, value, refs)))
	857	else:
	858	result.append(((location, key),
	859	(self, key, self._nodes[key])))
	860	return result
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	861	# generate results:
	862	# - figure out <, >, missing, present
	863	# - result present references so we can return them.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	864	# keys that we cannot answer until we resolve references
	865	pending_references = []
	866	pending_locations = set()
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	867	for location, key in location_keys:
	868	# can we answer from cache?
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	869	if key in self._bisect_nodes:
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	870	# the key has been parsed, so no lookup is needed
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	871	if self.node_ref_lists:
	872	# the references may not have been all parsed.
	873	value, refs = self._bisect_nodes[key]
	874	wanted_locations = []
	875	for ref_list in refs:
	876	for ref in ref_list:
	877	if ref not in self._keys_by_offset:
	878	wanted_locations.append(ref)
	879	if wanted_locations:
	880	pending_locations.update(wanted_locations)
	881	pending_references.append((location, key))
	882	continue
	883	result.append(((location, key), (self, key,
	884	value, self._resolve_references(refs))))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	885	else:
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	886	result.append(((location, key),
	887	(self, key, self._bisect_nodes[key])))
	888	continue
	889	else:
	890	# has the region the key should be in, been parsed?
	891	index = self._parsed_key_index(key)
	892	if (self._parsed_key_map[index][0] <= key and
	893	(self._parsed_key_map[index][1] >= key or
	894	# end of the file has been parsed
	895	self._parsed_byte_map[index][1] == self._size)):
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	896	result.append(((location, key), False))
2911.3.1 by Robert Collins (robertc) Improve index bisection lookup performance looking for keys in the parsed dict before doing bisection searches in the parsed ranges. (Robert Collins).	897	continue
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	898	# no, is the key above or below the probed location:
	899	# get the range of the probed & parsed location
	900	index = self._parsed_byte_index(location)
	901	# if the key is below the start of the range, its below
	902	if key < self._parsed_key_map[index][0]:
	903	direction = -1
	904	else:
	905	direction = +1
	906	result.append(((location, key), direction))
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	907	readv_ranges = []
	908	# lookup data to resolve references
	909	for location in pending_locations:
	910	length = 800
	911	if location + length > self._size:
	912	length = self._size - location
	913	# TODO: trim out parsed locations (e.g. if the 800 is into the
2890.2.16 by Robert Collins Review feedback.	914	# parsed region trim it, and dont use the adjust_for_latency
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	915	# facility)
	916	if length > 0:
	917	readv_ranges.append((location, length))
	918	self._read_and_parse(readv_ranges)
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	919	if self._nodes is not None:
	920	# The _read_and_parse triggered a _buffer_all, grab the data and
	921	# return it
	922	for location, key in pending_references:
	923	value, refs = self._nodes[key]
	924	result.append(((location, key), (self, key, value, refs)))
	925	return result
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	926	for location, key in pending_references:
	927	# answer key references we had to look-up-late.
	928	value, refs = self._bisect_nodes[key]
	929	result.append(((location, key), (self, key,
	930	value, self._resolve_references(refs))))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	931	return result
	932
	933	def _parse_header_from_bytes(self, bytes):
	934	"""Parse the header from a region of bytes.
	935
	936	:param bytes: The data to parse.
	937	:return: An offset, data tuple such as readv yields, for the unparsed
	938	data. (which may length 0).
	939	"""
	940	signature = bytes[0:len(self._signature())]
	941	if not signature == self._signature():
	942	raise errors.BadIndexFormatSignature(self._name, GraphIndex)
	943	lines = bytes[len(self._signature()):].splitlines()
	944	options_line = lines[0]
	945	if not options_line.startswith(_OPTION_NODE_REFS):
	946	raise errors.BadIndexOptions(self)
	947	try:
	948	self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
	949	except ValueError:
	950	raise errors.BadIndexOptions(self)
	951	options_line = lines[1]
	952	if not options_line.startswith(_OPTION_KEY_ELEMENTS):
	953	raise errors.BadIndexOptions(self)
	954	try:
	955	self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
	956	except ValueError:
	957	raise errors.BadIndexOptions(self)
	958	options_line = lines[2]
	959	if not options_line.startswith(_OPTION_LEN):
	960	raise errors.BadIndexOptions(self)
	961	try:
	962	self._key_count = int(options_line[len(_OPTION_LEN):])
	963	except ValueError:
	964	raise errors.BadIndexOptions(self)
	965	# calculate the bytes we have processed
	966	header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
	967	len(lines[2]) + 3)
	968	self._parsed_bytes(0, None, header_end, None)
	969	# setup parsing state
	970	self._expected_elements = 3 + self._key_length
	971	# raw data keyed by offset
	972	self._keys_by_offset = {}
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	973	# keys with the value and node references
	974	self._bisect_nodes = {}
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	975	return header_end, bytes[header_end:]
	976
	977	def _parse_region(self, offset, data):
	978	"""Parse node data returned from a readv operation.
	979
	980	:param offset: The byte offset the data starts at.
	981	:param data: The data to parse.
	982	"""
	983	# trim the data.
	984	# end first:
	985	end = offset + len(data)
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	986	high_parsed = offset
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	987	while True:
	988	# Trivial test - if the current index's end is within the
	989	# low-matching parsed range, we're done.
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	990	index = self._parsed_byte_index(high_parsed)
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	991	if end < self._parsed_byte_map[index][1]:
	992	return
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	993	# print "[%d:%d]" % (offset, end), \
	994	# self._parsed_byte_map[index:index + 2]
	995	high_parsed, last_segment = self._parse_segment(
	996	offset, data, end, index)
	997	if last_segment:
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	998	return
	999
	1000	def _parse_segment(self, offset, data, end, index):
	1001	"""Parse one segment of data.
	1002
	1003	:param offset: Where 'data' begins in the file.
	1004	:param data: Some data to parse a segment of.
	1005	:param end: Where data ends
	1006	:param index: The current index into the parsed bytes map.
	1007	:return: True if the parsed segment is the last possible one in the
	1008	range of data.
2890.2.15 by Robert Collins Corner case when parsing repeated sections - the bottom section of a region may not be parsed, so we need to manually advance past that.	1009	:return: high_parsed_byte, last_segment.
	1010	high_parsed_byte is the location of the highest parsed byte in this
	1011	segment, last_segment is True if the parsed segment is the last
	1012	possible one in the data block.
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1013	"""
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1014	# default is to use all data
	1015	trim_end = None
	1016	# accomodate overlap with data before this.
	1017	if offset < self._parsed_byte_map[index][1]:
	1018	# overlaps the lower parsed region
	1019	# skip the parsed data
	1020	trim_start = self._parsed_byte_map[index][1] - offset
	1021	# don't trim the start for \n
	1022	start_adjacent = True
	1023	elif offset == self._parsed_byte_map[index][1]:
	1024	# abuts the lower parsed region
	1025	# use all data
	1026	trim_start = None
	1027	# do not trim anything
	1028	start_adjacent = True
	1029	else:
	1030	# does not overlap the lower parsed region
	1031	# use all data
	1032	trim_start = None
	1033	# but trim the leading \n
	1034	start_adjacent = False
	1035	if end == self._size:
	1036	# lines up to the end of all data:
	1037	# use it all
	1038	trim_end = None
	1039	# do not strip to the last \n
	1040	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1041	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1042	elif index + 1 == len(self._parsed_byte_map):
	1043	# at the end of the parsed data
	1044	# use it all
	1045	trim_end = None
	1046	# but strip to the last \n
	1047	end_adjacent = False
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1048	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1049	elif end == self._parsed_byte_map[index + 1][0]:
	1050	# buts up against the next parsed region
	1051	# use it all
	1052	trim_end = None
	1053	# do not strip to the last \n
	1054	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1055	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1056	elif end > self._parsed_byte_map[index + 1][0]:
	1057	# overlaps into the next parsed region
	1058	# only consider the unparsed data
	1059	trim_end = self._parsed_byte_map[index + 1][0] - offset
	1060	# do not strip to the last \n as we know its an entire record
	1061	end_adjacent = True
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1062	last_segment = end < self._parsed_byte_map[index + 1][1]
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1063	else:
	1064	# does not overlap into the next region
	1065	# use it all
	1066	trim_end = None
	1067	# but strip to the last \n
	1068	end_adjacent = False
2890.2.14 by Robert Collins Parse more than one segment of data from a single readv response if needed.	1069	last_segment = True
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1070	# now find bytes to discard if needed
	1071	if not start_adjacent:
	1072	# work around python bug in rfind
	1073	if trim_start is None:
	1074	trim_start = data.find('\n') + 1
	1075	else:
	1076	trim_start = data.find('\n', trim_start) + 1
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1077	if not (trim_start != 0):
	1078	raise AssertionError('no \n was present')
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1079	# print 'removing start', offset, trim_start, repr(data[:trim_start])
	1080	if not end_adjacent:
	1081	# work around python bug in rfind
	1082	if trim_end is None:
	1083	trim_end = data.rfind('\n') + 1
	1084	else:
	1085	trim_end = data.rfind('\n', None, trim_end) + 1
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1086	if not (trim_end != 0):
	1087	raise AssertionError('no \n was present')
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1088	# print 'removing end', offset, trim_end, repr(data[trim_end:])
	1089	# adjust offset and data to the parseable data.
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1090	trimmed_data = data[trim_start:trim_end]
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1091	if not (trimmed_data):
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1092	raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1093	% (trim_start, trim_end, offset, offset + len(data)))
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1094	if trim_start:
	1095	offset += trim_start
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1096	# print "parsing", repr(trimmed_data)
2890.2.10 by Robert Collins Add test coverage to ensure \r's are not mangled by bisection parsing.	1097	# splitlines mangles the \r delimiters.. don't use it.
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1098	lines = trimmed_data.split('\n')
2890.2.9 by Robert Collins Don't use splitlines for index data parsing, we embed \r.	1099	del lines[-1]
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1100	pos = offset
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1101	first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
	1102	for key, value in nodes:
	1103	self._bisect_nodes[key] = value
	1104	self._parsed_bytes(offset, first_key,
	1105	offset + len(trimmed_data), last_key)
	1106	return offset + len(trimmed_data), last_segment
	1107
	1108	def _parse_lines(self, lines, pos):
	1109	key = None
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1110	first_key = None
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1111	trailers = 0
	1112	nodes = []
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1113	for line in lines:
	1114	if line == '':
	1115	# must be at the end
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1116	if self._size:
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	1117	if not (self._size == pos + 1):
	1118	raise AssertionError("%s %s" % (self._size, pos))
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1119	trailers += 1
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1120	continue
	1121	elements = line.split('\0')
	1122	if len(elements) != self._expected_elements:
	1123	raise errors.BadIndexData(self)
3530.3.3 by Robert Collins Credit and explanation for interning.	1124	# keys are tuples. Each element is a string that may occur many
	1125	# times, so we intern them to save space. AB, RC, 200807
3711.3.13 by John Arbash Meinel Shave off another 5s by not building 'node_by_key'	1126	key = tuple([intern(element) for element in elements[:self._key_length]])
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1127	if first_key is None:
	1128	first_key = key
	1129	absent, references, value = elements[-3:]
	1130	ref_lists = []
	1131	for ref_string in references.split('\t'):
	1132	ref_lists.append(tuple([
	1133	int(ref) for ref in ref_string.split('\r') if ref
	1134	]))
	1135	ref_lists = tuple(ref_lists)
	1136	self._keys_by_offset[pos] = (key, absent, ref_lists, value)
	1137	pos += len(line) + 1 # +1 for the \n
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1138	if absent:
	1139	continue
	1140	if self.node_ref_lists:
	1141	node_value = (value, ref_lists)
	1142	else:
	1143	node_value = value
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1144	nodes.append((key, node_value))
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1145	# print "parsed ", key
2890.2.17 by Robert Collins Split _parse_segment out into a _parse_lines helper, reducing duplication with full index parsing.	1146	return first_key, key, nodes, trailers
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1147
	1148	def _parsed_bytes(self, start, start_key, end, end_key):
	1149	"""Mark the bytes from start to end as parsed.
	1150
	1151	Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
	1152	1) as parsed.
	1153
	1154	:param start: The start of the parsed region.
	1155	:param end: The end of the parsed region.
	1156	"""
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1157	index = self._parsed_byte_index(start)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1158	new_value = (start, end)
	1159	new_key = (start_key, end_key)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1160	if index == -1:
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1161	# first range parsed is always the beginning.
	1162	self._parsed_byte_map.insert(index, new_value)
	1163	self._parsed_key_map.insert(index, new_key)
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1164	return
	1165	# four cases:
	1166	# new region
	1167	# extend lower region
	1168	# extend higher region
	1169	# combine two regions
	1170	if (index + 1 < len(self._parsed_byte_map) and
	1171	self._parsed_byte_map[index][1] == start and
	1172	self._parsed_byte_map[index + 1][0] == end):
	1173	# combine two regions
	1174	self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
	1175	self._parsed_byte_map[index + 1][1])
	1176	self._parsed_key_map[index] = (self._parsed_key_map[index][0],
	1177	self._parsed_key_map[index + 1][1])
2890.2.12 by Robert Collins More index tweaks.	1178	del self._parsed_byte_map[index + 1]
2890.2.12 by Robert Collins More index tweaks.	1179	del self._parsed_key_map[index + 1]
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1180	elif self._parsed_byte_map[index][1] == start:
	1181	# extend the lower entry
	1182	self._parsed_byte_map[index] = (
	1183	self._parsed_byte_map[index][0], end)
	1184	self._parsed_key_map[index] = (
	1185	self._parsed_key_map[index][0], end_key)
	1186	elif (index + 1 < len(self._parsed_byte_map) and
	1187	self._parsed_byte_map[index + 1][0] == end):
	1188	# extend the higher entry
	1189	self._parsed_byte_map[index + 1] = (
	1190	start, self._parsed_byte_map[index + 1][1])
	1191	self._parsed_key_map[index + 1] = (
	1192	start_key, self._parsed_key_map[index + 1][1])
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1193	else:
2890.2.11 by Robert Collins Bisection improvements after integrating with packs.	1194	# new entry
	1195	self._parsed_byte_map.insert(index + 1, new_value)
	1196	self._parsed_key_map.insert(index + 1, new_key)
2890.2.5 by Robert Collins Create a content lookup function for bisection in GraphIndex.	1197
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1198	def _read_and_parse(self, readv_ranges):
4775.1.1 by Martin Pool Remove several 'the the' typos	1199	"""Read the ranges and parse the resulting data.
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1200
	1201	:param readv_ranges: A prepared readv range list.
	1202	"""
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1203	if not readv_ranges:
	1204	return
	1205	if self._nodes is None and self._bytes_read * 2 >= self._size:
	1206	# We've already read more than 50% of the file and we are about to
	1207	# request more data, just _buffer_all() and be done
	1208	self._buffer_all()
	1209	return
	1210
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1211	base_offset = self._base_offset
	1212	if base_offset != 0:
	1213	# Rewrite the ranges for the offset
	1214	readv_ranges = [(start+base_offset, size)
	1215	for start, size in readv_ranges]
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1216	readv_data = self._transport.readv(self._name, readv_ranges, True,
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1217	self._size + self._base_offset)
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1218	# parse
	1219	for offset, data in readv_data:
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1220	offset -= base_offset
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1221	self._bytes_read += len(data)
5074.4.3 by John Arbash Meinel Actually implement offset support for GraphIndex.	1222	if offset < 0:
	1223	# transport.readv() expanded to extra data which isn't part of
	1224	# this index
	1225	data = data[-offset:]
	1226	offset = 0
3665.3.5 by John Arbash Meinel Move the point at which we 'buffer_all' if we've read >50% of the index.	1227	if offset == 0 and len(data) == self._size:
	1228	# We read the whole range, most likely because the
	1229	# Transport upcast our readv ranges into one long request
	1230	# for enough total data to grab the whole index.
	1231	self._buffer_all(StringIO(data))
	1232	return
	1233	if self._bisect_nodes is None:
	1234	# this must be the start
	1235	if not (offset == 0):
	1236	raise AssertionError()
	1237	offset, data = self._parse_header_from_bytes(data)
	1238	# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
	1239	self._parse_region(offset, data)
2890.2.6 by Robert Collins Add support for key references to the index lookup_keys_via_location bisection interface.	1240
2592.1.8 by Robert Collins Empty files should validate ok.	1241	def _signature(self):
	1242	"""The file signature for this index type."""
	1243	return _SIGNATURE
	1244
2592.1.7 by Robert Collins A validate that goes boom.	1245	def validate(self):
2592.1.7 by Robert Collins A validate that goes boom.	1246	"""Validate that everything in the index can be accessed."""
2592.1.27 by Robert Collins Test missing end lines with non-empty indices.	1247	# iter_all validates completely at the moment, so just do that.
	1248	for node in self.iter_all_entries():
	1249	pass
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1250
	1251
	1252	class CombinedGraphIndex(object):
	1253	"""A GraphIndex made up from smaller GraphIndices.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1254
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1255	The backing indices must implement GraphIndex, and are presumed to be
	1256	static data.
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1257
	1258	Queries against the combined index will be made against the first index,
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1259	and then the second and so on. The order of indices can thus influence
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1260	performance significantly. For example, if one index is on local disk and a
	1261	second on a remote server, the local disk index should be before the other
	1262	in the index list.
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1263
	1264	Also, queries tend to need results from the same indices as previous
	1265	queries. So the indices will be reordered after every query to put the
	1266	indices that had the result(s) of that query first (while otherwise
	1267	preserving the relative ordering).
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1268	"""
	1269
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1270	def __init__(self, indices, reload_func=None):
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1271	"""Create a CombinedGraphIndex backed by indices.
	1272
2592.1.45 by Robert Collins Tweak documentation as per Aaron's review.	1273	:param indices: An ordered list of indices to query for data.
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1274	:param reload_func: A function to call if we find we are missing an
	1275	index. Should have the form reload_func() => True/False to indicate
	1276	if reloading actually changed anything.
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1277	"""
	1278	self._indices = indices
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1279	self._reload_func = reload_func
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1280	# Sibling indices are other CombinedGraphIndex that we should call
	1281	# _move_to_front_by_name on when we auto-reorder ourself.
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1282	self._sibling_indices = []
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1283	# A list of names that corresponds to the instances in self._indices,
	1284	# so _index_names[0] is always the name for _indices[0], etc. Sibling
	1285	# indices must all use the same set of names as each other.
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1286	self._index_names = [None] * len(self._indices)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1287
2592.5.4 by Martin Pool Add CombinedGraphIndex repr	1288	def __repr__(self):
	1289	return "%s(%s)" % (
	1290	self.__class__.__name__,
	1291	', '.join(map(repr, self._indices)))
	1292
4744.2.6 by John Arbash Meinel Start exposing an GraphIndex.clear_cache() member.	1293	def clear_cache(self):
	1294	"""See GraphIndex.clear_cache()"""
	1295	for index in self._indices:
	1296	index.clear_cache()
	1297
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1298	def get_parent_map(self, keys):
4379.3.3 by Gary van der Merwe Rename and add doc string for StackedParentsProvider.	1299	"""See graph.StackedParentsProvider.get_parent_map"""
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1300	search_keys = set(keys)
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1301	if _mod_revision.NULL_REVISION in search_keys:
	1302	search_keys.discard(_mod_revision.NULL_REVISION)
	1303	found_parents = {_mod_revision.NULL_REVISION:[]}
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1304	else:
	1305	found_parents = {}
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1306	for index, key, value, refs in self.iter_entries(search_keys):
	1307	parents = refs[0]
	1308	if not parents:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1309	parents = (_mod_revision.NULL_REVISION,)
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1310	found_parents[key] = parents
3099.3.1 by John Arbash Meinel Implement get_parent_map for ParentProviders	1311	return found_parents
2979.2.2 by Robert Collins Per-file graph heads detection during commit for pack repositories.	1312
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	1313	has_key = _has_key_from_parent_map
3830.3.9 by Martin Pool Simplify kvf insert_record_stream; add has_key shorthand methods; update stacking effort tests	1314
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1315	def insert_index(self, pos, index, name=None):
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1316	"""Insert a new index in the list of indices to query.
	1317
	1318	:param pos: The position to insert the index.
	1319	:param index: The index to insert.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1320	:param name: a name for this index, e.g. a pack name. These names can
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1321	be used to reflect index reorderings to related CombinedGraphIndex
5086.7.6 by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices.	1322	instances that use the same names. (see set_sibling_indices)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1323	"""
	1324	self._indices.insert(pos, index)
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1325	self._index_names.insert(pos, name)
2592.1.37 by Robert Collins Add CombinedGraphIndex.insert_index.	1326
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1327	def iter_all_entries(self):
	1328	"""Iterate over all keys within the index
	1329
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	1330	Duplicate keys across child indices are presumed to have the same
	1331	value and are only reported once.
	1332
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1333	:return: An iterable of (index, key, reference_lists, value).
	1334	There is no defined order for the result iteration - it will be in
	1335	the most efficient order for the index.
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1336	"""
	1337	seen_keys = set()
3789.1.5 by John Arbash Meinel CombinedGraphIndex.iter_all_entries() can now reload when needed.	1338	while True:
	1339	try:
	1340	for index in self._indices:
	1341	for node in index.iter_all_entries():
	1342	if node[1] not in seen_keys:
	1343	yield node
	1344	seen_keys.add(node[1])
	1345	return
	1346	except errors.NoSuchFile:
	1347	self._reload_or_raise()
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1348
	1349	def iter_entries(self, keys):
	1350	"""Iterate over keys within the index.
	1351
2592.1.44 by Robert Collins Remove some unneeded index iteration by checking if we have found all keys, and grammar improvements from Aaron's review.	1352	Duplicate keys across child indices are presumed to have the same
	1353	value and are only reported once.
	1354
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1355	:param keys: An iterable providing the keys to be retrieved.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1356	:return: An iterable of (index, key, reference_lists, value). There is
	1357	no defined order for the result iteration - it will be in the most
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1358	efficient order for the index.
	1359	"""
	1360	keys = set(keys)
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1361	hit_indices = []
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1362	while True:
	1363	try:
	1364	for index in self._indices:
	1365	if not keys:
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1366	break
	1367	index_hit = False
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1368	for node in index.iter_entries(keys):
	1369	keys.remove(node[1])
	1370	yield node
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1371	index_hit = True
	1372	if index_hit:
	1373	hit_indices.append(index)
	1374	break
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1375	except errors.NoSuchFile:
	1376	self._reload_or_raise()
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1377	self._move_to_front(hit_indices)
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1378
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	1379	def iter_entries_prefix(self, keys):
	1380	"""Iterate over keys within the index using prefix matching.
	1381
	1382	Duplicate keys across child indices are presumed to have the same
	1383	value and are only reported once.
	1384
	1385	Prefix matching is applied within the tuple of a key, not to within
	1386	the bytestring of each key element. e.g. if you have the keys ('foo',
	1387	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1388	only the former key is returned.
	1389
	1390	:param keys: An iterable providing the key prefixes to be retrieved.
	1391	Each key prefix takes the form of a tuple the length of a key, but
	1392	with the last N elements 'None' rather than a regular bytestring.
	1393	The first element cannot be 'None'.
	1394	:return: An iterable as per iter_all_entries, but restricted to the
	1395	keys with a matching prefix to those supplied. No additional keys
	1396	will be returned, and every match that is in the index will be
	1397	returned.
	1398	"""
	1399	keys = set(keys)
	1400	if not keys:
	1401	return
	1402	seen_keys = set()
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1403	hit_indices = []
3789.1.6 by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed.	1404	while True:
	1405	try:
	1406	for index in self._indices:
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1407	index_hit = False
3789.1.6 by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed.	1408	for node in index.iter_entries_prefix(keys):
	1409	if node[1] in seen_keys:
	1410	continue
	1411	seen_keys.add(node[1])
	1412	yield node
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1413	index_hit = True
	1414	if index_hit:
	1415	hit_indices.append(index)
	1416	break
3789.1.6 by John Arbash Meinel CombinedGraphIndex.iter_entries_prefix can now reload when needed.	1417	except errors.NoSuchFile:
	1418	self._reload_or_raise()
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1419	self._move_to_front(hit_indices)
	1420
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1421	def _move_to_front(self, hit_indices):
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1422	"""Rearrange self._indices so that hit_indices are first.
	1423
	1424	Order is maintained as much as possible, e.g. the first unhit index
	1425	will be the first index in _indices after the hit_indices, and the
	1426	hit_indices will be present in exactly the order they are passed to
	1427	_move_to_front.
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1428
	1429	_move_to_front propagates to all objects in self._sibling_indices by
	1430	calling _move_to_front_by_name.
5086.7.1 by Andrew Bennetts Add CombinedGraphIndex(auto_reorder=True) feature, which seems to help incremental fetchs from 2a over HTTP.	1431	"""
5151.2.1 by John Arbash Meinel Avoid reordering when unnecessary. Fixes bug #562429	1432	if self._indices[:len(hit_indices)] == hit_indices:
	1433	# The 'hit_indices' are already at the front (and in the same
	1434	# order), no need to re-order
	1435	return
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1436	hit_names = self._move_to_front_by_index(hit_indices)
	1437	for sibling_idx in self._sibling_indices:
	1438	sibling_idx._move_to_front_by_name(hit_names)
	1439
	1440	def _move_to_front_by_index(self, hit_indices):
	1441	"""Core logic for _move_to_front.
	1442
	1443	Returns a list of names corresponding to the hit_indices param.
	1444	"""
5151.2.3 by John Arbash Meinel Restore the indices_info variable.	1445	indices_info = zip(self._index_names, self._indices)
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1446	if 'index' in debug.debug_flags:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1447	trace.mutter('CombinedGraphIndex reordering: currently %r, '
	1448	'promoting %r', indices_info, hit_indices)
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1449	hit_names = []
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1450	unhit_names = []
	1451	new_hit_indices = []
	1452	unhit_indices = []
	1453
5151.2.3 by John Arbash Meinel Restore the indices_info variable.	1454	for offset, (name, idx) in enumerate(indices_info):
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1455	if idx in hit_indices:
5151.2.4 by John Arbash Meinel Minor tweak	1456	hit_names.append(name)
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1457	new_hit_indices.append(idx)
	1458	if len(new_hit_indices) == len(hit_indices):
	1459	# We've found all of the hit entries, everything else is
	1460	# unhit
	1461	unhit_names.extend(self._index_names[offset+1:])
	1462	unhit_indices.extend(self._indices[offset+1:])
	1463	break
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1464	else:
5151.2.2 by John Arbash Meinel Avoid packing and unpacking the indices, and shortcut once you've found all	1465	unhit_names.append(name)
	1466	unhit_indices.append(idx)
	1467
	1468	self._indices = new_hit_indices + unhit_indices
	1469	self._index_names = hit_names + unhit_names
5086.7.4 by Andrew Bennetts Remove auto_reorder param, just do it unconditionally. Add some -Dindex mutters.	1470	if 'index' in debug.debug_flags:
5753.2.2 by Jelmer Vernooij Remove some unnecessary imports, clean up lazy imports.	1471	trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1472	return hit_names
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1473
	1474	def _move_to_front_by_name(self, hit_names):
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1475	"""Moves indices named by 'hit_names' to front of the search order, as
	1476	described in _move_to_front.
	1477	"""
	1478	# Translate names to index instances, and then call
	1479	# _move_to_front_by_index.
5086.7.2 by Andrew Bennetts Share ordering hints between CombinedIndex objects of one RepositoryPackCollection. Greatly improves small fetches from repos with many pack files.	1480	indices_info = zip(self._index_names, self._indices)
	1481	hit_indices = []
	1482	for name, idx in indices_info:
	1483	if name in hit_names:
	1484	hit_indices.append(idx)
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1485	self._move_to_front_by_index(hit_indices)
2624.2.9 by Robert Collins Introduce multiple component keys, which is what is needed to combine multiple knit indices into one.	1486
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1487	def find_ancestry(self, keys, ref_list_num):
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1488	"""Find the complete ancestry for the given set of keys.
	1489
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1490	Note that this is a whole-ancestry request, so it should be used
	1491	sparingly.
	1492
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1493	:param keys: An iterable of keys to look for
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1494	:param ref_list_num: The reference list which references the parents
	1495	we care about.
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1496	:return: (parent_map, missing_keys)
	1497	"""
5086.7.3 by Andrew Bennetts Improve docstrings and refactor slightly for clarity.	1498	# XXX: make this call _move_to_front?
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1499	missing_keys = set()
	1500	parent_map = {}
	1501	keys_to_lookup = set(keys)
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1502	generation = 0
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1503	while keys_to_lookup:
	1504	# keys that all indexes claim are missing, stop searching them
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1505	generation += 1
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1506	all_index_missing = None
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1507	# print 'gen\tidx\tsub\tn_keys\tn_pmap\tn_miss'
	1508	# print '%4d\t\t\t%4d\t%5d\t%5d' % (generation, len(keys_to_lookup),
	1509	# len(parent_map),
	1510	# len(missing_keys))
	1511	for index_idx, index in enumerate(self._indices):
	1512	# TODO: we should probably be doing something with
	1513	# 'missing_keys' since we've already determined that
	1514	# those revisions have not been found anywhere
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1515	index_missing_keys = set()
	1516	# Find all of the ancestry we can from this index
	1517	# keep looking until the search_keys set is empty, which means
	1518	# things we didn't find should be in index_missing_keys
	1519	search_keys = keys_to_lookup
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1520	sub_generation = 0
	1521	# print ' \t%2d\t\t%4d\t%5d\t%5d' % (
	1522	# index_idx, len(search_keys),
	1523	# len(parent_map), len(index_missing_keys))
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1524	while search_keys:
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1525	sub_generation += 1
	1526	# TODO: ref_list_num should really be a parameter, since
	1527	# CombinedGraphIndex does not know what the ref lists
	1528	# mean.
4593.4.12 by John Arbash Meinel Name the specific index api _find_ancestors, and the public CombinedGraphIndex api find_ancestry()	1529	search_keys = index._find_ancestors(search_keys,
	1530	ref_list_num, parent_map, index_missing_keys)
4593.4.9 by John Arbash Meinel Add some debugging statements for now.	1531	# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
	1532	# sub_generation, len(search_keys),
	1533	# len(parent_map), len(index_missing_keys))
4593.4.8 by John Arbash Meinel Implement CombinedGraphIndex.get_ancestry()	1534	# Now set whatever was missing to be searched in the next index
	1535	keys_to_lookup = index_missing_keys
	1536	if all_index_missing is None:
	1537	all_index_missing = set(index_missing_keys)
	1538	else:
	1539	all_index_missing.intersection_update(index_missing_keys)
	1540	if not keys_to_lookup:
	1541	break
	1542	if all_index_missing is None:
	1543	# There were no indexes, so all search keys are 'missing'
	1544	missing_keys.update(keys_to_lookup)
	1545	keys_to_lookup = None
	1546	else:
	1547	missing_keys.update(all_index_missing)
	1548	keys_to_lookup.difference_update(all_index_missing)
	1549	return parent_map, missing_keys
	1550
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1551	def key_count(self):
	1552	"""Return an estimate of the number of keys in this index.
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1553
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1554	For CombinedGraphIndex this is approximated by the sum of the keys of
	1555	the child indices. As child indices may have duplicate keys this can
	1556	have a maximum error of the number of child indices * largest number of
	1557	keys in any index.
	1558	"""
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1559	while True:
3789.1.3 by John Arbash Meinel CombinedGraphIndex can now reload when calling key_count().	1560	try:
	1561	return sum((index.key_count() for index in self._indices), 0)
	1562	except errors.NoSuchFile:
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1563	self._reload_or_raise()
	1564
3830.3.12 by Martin Pool Review cleanups: unify has_key impls, add missing_keys(), clean up exception blocks	1565	missing_keys = _missing_keys_from_parent_map
	1566
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1567	def _reload_or_raise(self):
	1568	"""We just got a NoSuchFile exception.
	1569
	1570	Try to reload the indices, if it fails, just raise the current
	1571	exception.
	1572	"""
	1573	if self._reload_func is None:
	1574	raise
	1575	exc_type, exc_value, exc_traceback = sys.exc_info()
3789.1.10 by John Arbash Meinel Review comments from Martin.	1576	trace.mutter('Trying to reload after getting exception: %s',
	1577	exc_value)
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1578	if not self._reload_func():
	1579	# We tried to reload, but nothing changed, so we fail anyway
3789.1.10 by John Arbash Meinel Review comments from Martin.	1580	trace.mutter('_reload_func indicated nothing has changed.'
	1581	' Raising original exception.')
3789.1.4 by John Arbash Meinel CombinedGraphIndex.iter_entries() is now able to reload on request.	1582	raise exc_type, exc_value, exc_traceback
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1583
5086.7.6 by Andrew Bennetts Add public set_sibling_indices API so that AggregateIndex doesn't have to poke at _sibling_indices.	1584	def set_sibling_indices(self, sibling_combined_graph_indices):
	1585	"""Set the CombinedGraphIndex objects to reorder after reordering self.
	1586	"""
	1587	self._sibling_indices = sibling_combined_graph_indices
	1588
2592.1.31 by Robert Collins Build a combined graph index to use multiple indices at once.	1589	def validate(self):
	1590	"""Validate that everything in the index can be accessed."""
3789.1.7 by John Arbash Meinel CombinedGraphIndex.validate() will now reload.	1591	while True:
	1592	try:
	1593	for index in self._indices:
	1594	index.validate()
	1595	return
	1596	except errors.NoSuchFile:
	1597	self._reload_or_raise()
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1598
	1599
	1600	class InMemoryGraphIndex(GraphIndexBuilder):
	1601	"""A GraphIndex which operates entirely out of memory and is mutable.
	1602
	1603	This is designed to allow the accumulation of GraphIndex entries during a
	1604	single write operation, where the accumulated entries need to be immediately
	1605	available - for example via a CombinedGraphIndex.
	1606	"""
	1607
	1608	def add_nodes(self, nodes):
	1609	"""Add nodes to the index.
	1610
	1611	:param nodes: An iterable of (key, node_refs, value) entries to add.
	1612	"""
2592.3.39 by Robert Collins Fugly version to remove signatures.kndx	1613	if self.reference_lists:
	1614	for (key, value, node_refs) in nodes:
	1615	self.add_node(key, value, node_refs)
	1616	else:
	1617	for (key, value) in nodes:
	1618	self.add_node(key, value)
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1619
	1620	def iter_all_entries(self):
	1621	"""Iterate over all keys within the index
	1622
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1623	:return: An iterable of (index, key, reference_lists, value). There is no
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1624	defined order for the result iteration - it will be in the most
	1625	efficient order for the index (in this case dictionary hash order).
	1626	"""
2745.1.1 by Robert Collins Add a number of -Devil checkpoints.	1627	if 'evil' in debug.debug_flags:
2592.3.112 by Robert Collins Various fixups found dogfooding.	1628	trace.mutter_callsite(3,
2745.1.2 by Robert Collins Ensure mutter_callsite is not directly called on a lazy_load object, to make the stacklevel parameter work correctly.	1629	"iter_all_entries scales with size of history.")
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1630	if self.reference_lists:
	1631	for key, (absent, references, value) in self._nodes.iteritems():
	1632	if not absent:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1633	yield self, key, value, references
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1634	else:
	1635	for key, (absent, references, value) in self._nodes.iteritems():
	1636	if not absent:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1637	yield self, key, value
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1638
	1639	def iter_entries(self, keys):
	1640	"""Iterate over keys within the index.
	1641
	1642	:param keys: An iterable providing the keys to be retrieved.
2979.2.4 by Robert Collins Docstring fixes from review.	1643	:return: An iterable of (index, key, value, reference_lists). There is no
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1644	defined order for the result iteration - it will be in the most
	1645	efficient order for the index (keys iteration order in this case).
	1646	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1647	# Note: See BTreeBuilder.iter_entries for an explanation of why we
	1648	# aren't using set().intersection() here
	1649	nodes = self._nodes
	1650	keys = [key for key in keys if key in nodes]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1651	if self.reference_lists:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1652	for key in keys:
	1653	node = nodes[key]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1654	if not node[0]:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1655	yield self, key, node[2], node[1]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1656	else:
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1657	for key in keys:
	1658	node = nodes[key]
2592.1.46 by Robert Collins Make GraphIndex accept nodes as key, value, references, so that the method	1659	if not node[0]:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1660	yield self, key, node[2]
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1661
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1662	def iter_entries_prefix(self, keys):
	1663	"""Iterate over keys within the index using prefix matching.
	1664
	1665	Prefix matching is applied within the tuple of a key, not to within
	1666	the bytestring of each key element. e.g. if you have the keys ('foo',
	1667	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1668	only the former key is returned.
	1669
	1670	:param keys: An iterable providing the key prefixes to be retrieved.
	1671	Each key prefix takes the form of a tuple the length of a key, but
	1672	with the last N elements 'None' rather than a regular bytestring.
	1673	The first element cannot be 'None'.
	1674	:return: An iterable as per iter_all_entries, but restricted to the
	1675	keys with a matching prefix to those supplied. No additional keys
	1676	will be returned, and every match that is in the index will be
	1677	returned.
	1678	"""
	1679	# XXX: To much duplication with the GraphIndex class; consider finding
	1680	# a good place to pull out the actual common logic.
	1681	keys = set(keys)
	1682	if not keys:
	1683	return
	1684	if self._key_length == 1:
	1685	for key in keys:
	1686	# sanity check
	1687	if key[0] is None:
	1688	raise errors.BadIndexKey(key)
	1689	if len(key) != self._key_length:
	1690	raise errors.BadIndexKey(key)
	1691	node = self._nodes[key]
	1692	if node[0]:
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1693	continue
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1694	if self.reference_lists:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1695	yield self, key, node[2], node[1]
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1696	else:
2624.2.17 by Robert Collins Review feedback.	1697	yield self, key, node[2]
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1698	return
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	1699	nodes_by_key = self._get_nodes_by_key()
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1700	for key in keys:
	1701	# sanity check
	1702	if key[0] is None:
	1703	raise errors.BadIndexKey(key)
	1704	if len(key) != self._key_length:
	1705	raise errors.BadIndexKey(key)
	1706	# find what it refers to:
3644.2.4 by John Arbash Meinel Change GraphIndex to also have a _get_nodes_by_key	1707	key_dict = nodes_by_key
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1708	elements = list(key)
	1709	# find the subdict to return
	1710	try:
	1711	while len(elements) and elements[0] is not None:
	1712	key_dict = key_dict[elements[0]]
	1713	elements.pop(0)
	1714	except KeyError:
	1715	# a non-existant lookup.
	1716	continue
	1717	if len(elements):
	1718	dicts = [key_dict]
	1719	while dicts:
	1720	key_dict = dicts.pop(-1)
	1721	# can't be empty or would not exist
	1722	item, value = key_dict.iteritems().next()
	1723	if type(value) == dict:
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1724	# push keys
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1725	dicts.extend(key_dict.itervalues())
	1726	else:
	1727	# yield keys
	1728	for value in key_dict.itervalues():
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1729	yield (self, ) + value
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1730	else:
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1731	yield (self, ) + key_dict
2624.2.10 by Robert Collins Also add iter_key_prefix support to InMemoryGraphIndex.	1732
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1733	def key_count(self):
	1734	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1735
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1736	For InMemoryGraphIndex the estimate is exact.
	1737	"""
4789.28.2 by John Arbash Meinel Get rid of the GraphIndexBuilder/BTreeBuilder._keys attribute.	1738	return len(self._nodes) - len(self._absent_keys)
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1739
2592.1.38 by Robert Collins Create an InMemoryGraphIndex for temporary indexing.	1740	def validate(self):
	1741	"""In memory index's have no known corruption at the moment."""
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1742
	1743
	1744	class GraphIndexPrefixAdapter(object):
	1745	"""An adapter between GraphIndex with different key lengths.
	1746
	1747	Queries against this will emit queries against the adapted Graph with the
	1748	prefix added, queries for all items use iter_entries_prefix. The returned
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1749	nodes will have their keys and node references adjusted to remove the
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1750	prefix. Finally, an add_nodes_callback can be supplied - when called the
	1751	nodes and references being added will have prefix prepended.
	1752	"""
	1753
2624.2.17 by Robert Collins Review feedback.	1754	def __init__(self, adapted, prefix, missing_key_length,
2624.2.17 by Robert Collins Review feedback.	1755	add_nodes_callback=None):
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1756	"""Construct an adapter against adapted with prefix."""
	1757	self.adapted = adapted
2624.2.19 by Robert Collins Why we should always test before committing.	1758	self.prefix_key = prefix + (None,)*missing_key_length
2624.2.17 by Robert Collins Review feedback.	1759	self.prefix = prefix
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1760	self.prefix_len = len(prefix)
	1761	self.add_nodes_callback = add_nodes_callback
	1762
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1763	def add_nodes(self, nodes):
	1764	"""Add nodes to the index.
	1765
	1766	:param nodes: An iterable of (key, node_refs, value) entries to add.
	1767	"""
	1768	# save nodes in case its an iterator
	1769	nodes = tuple(nodes)
	1770	translated_nodes = []
	1771	try:
2624.2.17 by Robert Collins Review feedback.	1772	# Add prefix_key to each reference node_refs is a tuple of tuples,
2624.2.17 by Robert Collins Review feedback.	1773	# so split it apart, and add prefix_key to the internal reference
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1774	for (key, value, node_refs) in nodes:
	1775	adjusted_references = (
2624.2.17 by Robert Collins Review feedback.	1776	tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1777	for ref_list in node_refs))
2624.2.17 by Robert Collins Review feedback.	1778	translated_nodes.append((self.prefix + key, value,
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1779	adjusted_references))
	1780	except ValueError:
	1781	# XXX: TODO add an explicit interface for getting the reference list
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1782	# status, to handle this bit of user-friendliness in the API more
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1783	# explicitly.
	1784	for (key, value) in nodes:
2624.2.17 by Robert Collins Review feedback.	1785	translated_nodes.append((self.prefix + key, value))
2624.2.13 by Robert Collins Implement add_node/add_nodes to the GraphIndexPrefixAdapter.	1786	self.add_nodes_callback(translated_nodes)
	1787
	1788	def add_node(self, key, value, references=()):
	1789	"""Add a node to the index.
	1790
	1791	:param key: The key. keys are non-empty tuples containing
	1792	as many whitespace-free utf8 bytestrings as the key length
	1793	defined for this index.
	1794	:param references: An iterable of iterables of keys. Each is a
	1795	reference to another key.
	1796	:param value: The value to associate with the key. It may be any
	1797	bytes as long as it does not contain \0 or \n.
	1798	"""
	1799	self.add_nodes(((key, value, references), ))
	1800
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1801	def _strip_prefix(self, an_iter):
	1802	"""Strip prefix data from nodes and return it."""
	1803	for node in an_iter:
	1804	# cross checks
2624.2.17 by Robert Collins Review feedback.	1805	if node[1][:self.prefix_len] != self.prefix:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1806	raise errors.BadIndexData(self)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1807	for ref_list in node[3]:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1808	for ref_node in ref_list:
2624.2.17 by Robert Collins Review feedback.	1809	if ref_node[:self.prefix_len] != self.prefix:
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1810	raise errors.BadIndexData(self)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1811	yield node[0], node[1][self.prefix_len:], node[2], (
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1812	tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
2624.2.14 by Robert Collins Add source index to the index iteration API to allow mapping back to the origin of retrieved data.	1813	for ref_list in node[3]))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1814
	1815	def iter_all_entries(self):
	1816	"""Iterate over all keys within the index
	1817
	1818	iter_all_entries is implemented against the adapted index using
	1819	iter_entries_prefix.
	1820
2592.5.1 by Martin Pool Fix docstrings for Index.iter_entries etc	1821	:return: An iterable of (index, key, reference_lists, value). There is no
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1822	defined order for the result iteration - it will be in the most
	1823	efficient order for the index (in this case dictionary hash order).
	1824	"""
2624.2.19 by Robert Collins Why we should always test before committing.	1825	return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1826
	1827	def iter_entries(self, keys):
	1828	"""Iterate over keys within the index.
	1829
	1830	:param keys: An iterable providing the keys to be retrieved.
2979.2.4 by Robert Collins Docstring fixes from review.	1831	:return: An iterable of (index, key, value, reference_lists). There is no
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1832	defined order for the result iteration - it will be in the most
	1833	efficient order for the index (keys iteration order in this case).
	1834	"""
	1835	return self._strip_prefix(self.adapted.iter_entries(
2624.2.17 by Robert Collins Review feedback.	1836	self.prefix + key for key in keys))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1837
	1838	def iter_entries_prefix(self, keys):
	1839	"""Iterate over keys within the index using prefix matching.
	1840
	1841	Prefix matching is applied within the tuple of a key, not to within
	1842	the bytestring of each key element. e.g. if you have the keys ('foo',
	1843	'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then
	1844	only the former key is returned.
	1845
	1846	:param keys: An iterable providing the key prefixes to be retrieved.
	1847	Each key prefix takes the form of a tuple the length of a key, but
	1848	with the last N elements 'None' rather than a regular bytestring.
	1849	The first element cannot be 'None'.
	1850	:return: An iterable as per iter_all_entries, but restricted to the
	1851	keys with a matching prefix to those supplied. No additional keys
	1852	will be returned, and every match that is in the index will be
	1853	returned.
	1854	"""
	1855	return self._strip_prefix(self.adapted.iter_entries_prefix(
2624.2.17 by Robert Collins Review feedback.	1856	self.prefix + key for key in keys))
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1857
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1858	def key_count(self):
	1859	"""Return an estimate of the number of keys in this index.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	1860
2624.2.16 by Robert Collins Add a key_count method to GraphIndex and friends, allowing optimisation of length calculations by the index.	1861	For GraphIndexPrefixAdapter this is relatively expensive - key
	1862	iteration with the prefix is done.
	1863	"""
	1864	return len(list(self.iter_all_entries()))
	1865
2624.2.12 by Robert Collins Create an adapter between indices with differing key lengths.	1866	def validate(self):
	1867	"""Call the adapted's validate."""
	1868	self.adapted.validate()