~bzr-pqm/bzr/bzr.dev : contents of bzrlib/_btree_serializer

~bzr-pqm/bzr/bzr.dev : (revision 6228.3.4)

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.29 by John Arbash Meinel Cleanup the copyright headers	16	#
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	17
	18	"""Pyrex extensions to btree node parsing."""
	19
3731.1.1 by Robert Collins * The C extensions now build on python 2.4 (Robert Collins, #271939)	20	#python2.4 support
	21	cdef extern from "python-compat.h":
	22	pass
	23
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	24	cdef extern from "stdlib.h":
	25	ctypedef unsigned size_t
	26
	27	cdef extern from "Python.h":
3641.3.32 by John Arbash Meinel PQM's pyrex version requires Py_ssize_t to be manually defined	28	ctypedef int Py_ssize_t # Required for older pyrex versions
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	29	ctypedef struct PyObject:
	30	pass
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	31	int PyList_Append(object lst, object item) except -1
	32
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	33	char *PyString_AsString(object p) except NULL
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	34	object PyString_FromStringAndSize(char *, Py_ssize_t)
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	35	PyObject PyString_FromStringAndSize_ptr "PyString_FromStringAndSize" (char , Py_ssize_t)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	36	object PyString_FromFormat(char *, ...)
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	37	int PyString_CheckExact(object s)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	38	int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *)
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	39	Py_ssize_t PyString_Size(object p)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	40	Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *)
	41	char * PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *)
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	42	char * PyString_AS_STRING(object)
	43	Py_ssize_t PyString_GET_SIZE(object)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	44	int PyString_AsStringAndSize_ptr(PyObject , char buf, Py_ssize_t len)
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	45	void PyString_InternInPlace(PyObject **)
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	46	int PyTuple_CheckExact(object t)
4672.2.1 by John Arbash Meinel Speed up the innermost btree parsing function.	47	object PyTuple_New(Py_ssize_t n_entries)
	48	void PyTuple_SET_ITEM(object, Py_ssize_t offset, object) # steals the ref
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	49	Py_ssize_t PyTuple_GET_SIZE(object t)
	50	PyObject *PyTuple_GET_ITEM_ptr_object "PyTuple_GET_ITEM" (object tpl, int index)
4672.2.1 by John Arbash Meinel Speed up the innermost btree parsing function.	51	void Py_INCREF(object)
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	52	void Py_DECREF_ptr "Py_DECREF" (PyObject *)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	53	void *PyMem_Malloc(size_t nbytes)
	54	void PyMem_Free(void *)
	55	void memset(void *, int, size_t)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	56
	57	cdef extern from "string.h":
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	58	void memcpy(void dest, void *src, size_t n)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	59	void memchr(void s, int c, size_t n)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	60	int memcmp(void s1, void s2, size_t n)
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	61	# GNU extension
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	62	# void memrchr(void s, int c, size_t n)
	63	int strncmp(char s1, char s2, size_t n)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	64	unsigned long strtoul(char s1, char *out, int base)
5365.5.16 by John Arbash Meinel Trim the class a little bit.	65	long long strtoll(char s1, char *out, int base)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	66
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	67
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	68	# It seems we need to import the definitions so that the pyrex compiler has
	69	# local names to access them.
	70	from _static_tuple_c cimport StaticTuple, \
	71	import_static_tuple_c, StaticTuple_New, \
5365.5.11 by John Arbash Meinel get into the nitty gritty for the _key_to_sha1 function.	72	StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact, \
	73	StaticTuple_GET_SIZE, StaticTuple_GET_ITEM
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	74	# This tells the test infrastructure that StaticTuple is a class, so we don't
	75	# have to worry about exception checking.
	76	## extern cdef class StaticTuple
5365.5.28 by John Arbash Meinel Lots of compatibility changes for python2.4 and mingw32 compilers.	77	import sys
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	78
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	79
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	80	# TODO: Find some way to import this from _dirstate_helpers
4634.117.10 by John Arbash Meinel Change 'no except' to 'cannot_raise'	81	cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	82	# memrchr seems to be a GNU extension, so we have to implement it ourselves
	83	# It is not present in any win32 standard library
	84	cdef char *pos
	85	cdef char *start
	86
	87	start = <char*>s
	88	pos = start + n - 1
	89	while pos >= start:
	90	if pos[0] == c:
	91	return <void*>pos
	92	pos = pos - 1
	93	return NULL
	94
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	95
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	96	# TODO: Import this from _dirstate_helpers when it is merged
	97	cdef object safe_string_from_size(char *s, Py_ssize_t size):
	98	if size < 0:
	99	raise AssertionError(
	100	'tried to create a string with an invalid size: %d @0x%x'
	101	% (size, <int>s))
	102	return PyString_FromStringAndSize(s, size)
	103
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	104
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	105	cdef object safe_interned_string_from_size(char *s, Py_ssize_t size):
	106	cdef PyObject *py_str
	107	if size < 0:
	108	raise AssertionError(
	109	'tried to create a string with an invalid size: %d @0x%x'
	110	% (size, <int>s))
	111	py_str = PyString_FromStringAndSize_ptr(s, size)
	112	PyString_InternInPlace(&py_str)
	113	result = <object>py_str
	114	# Casting a PyObject* to an <object> triggers an INCREF from Pyrex, so we
	115	# DECREF it to avoid geting immortal strings
	116	Py_DECREF_ptr(py_str)
	117	return result
	118
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	119	# This sets up the StaticTuple C_API functionality
	120	import_static_tuple_c()
	121
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	122
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	123	cdef class BTreeLeafParser:
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	124	"""Parse the leaf nodes of a BTree index.
	125
	126	:ivar bytes: The PyString object containing the uncompressed text for the
	127	node.
	128	:ivar key_length: An integer describing how many pieces the keys have for
	129	this index.
	130	:ivar ref_list_length: An integer describing how many references this index
	131	contains.
	132	:ivar keys: A PyList of keys found in this node.
	133
	134	:ivar _cur_str: A pointer to the start of the next line to parse
	135	:ivar _end_str: A pointer to the end of bytes
	136	:ivar _start: Pointer to the location within the current line while
	137	parsing.
	138	:ivar _header_found: True when we have parsed the header for this node
	139	"""
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	140
	141	cdef object bytes
	142	cdef int key_length
	143	cdef int ref_list_length
	144	cdef object keys
	145
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	146	cdef char * _cur_str
	147	cdef char * _end_str
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	148	# The current start point for parsing
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	149	cdef char * _start
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	150
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	151	cdef int _header_found
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	152
	153	def __init__(self, bytes, key_length, ref_list_length):
	154	self.bytes = bytes
	155	self.key_length = key_length
	156	self.ref_list_length = ref_list_length
	157	self.keys = []
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	158	self._cur_str = NULL
	159	self._end_str = NULL
	160	self._header_found = 0
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	161	# keys are tuples
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	162
	163	cdef extract_key(self, char * last):
	164	"""Extract a key.
	165
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	166	:param last: points at the byte after the last byte permitted for the
	167	key.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	168	"""
	169	cdef char *temp_ptr
	170	cdef int loop_counter
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	171	cdef StaticTuple key
	172
	173	key = StaticTuple_New(self.key_length)
4672.2.2 by John Arbash Meinel Switch to using a for loop, and add only when checking last rather than	174	for loop_counter from 0 <= loop_counter < self.key_length:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	175	# grab a key segment
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	176	temp_ptr = <char*>memchr(self._start, c'\0', last - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	177	if temp_ptr == NULL:
4672.2.2 by John Arbash Meinel Switch to using a for loop, and add only when checking last rather than	178	if loop_counter + 1 == self.key_length:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	179	# capture to last
	180	temp_ptr = last
	181	else:
	182	# Invalid line
	183	failure_string = ("invalid key, wanted segment from " +
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	184	repr(safe_string_from_size(self._start,
	185	last - self._start)))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	186	raise AssertionError(failure_string)
	187	# capture the key string
4679.8.1 by John Arbash Meinel Merge in the 2.1-static-tuple-btree branch, and restore the string intern tweaks.	188	if (self.key_length == 1
	189	and (temp_ptr - self._start) == 45
	190	and strncmp(self._start, 'sha1:', 5) == 0):
	191	key_element = safe_string_from_size(self._start,
	192	temp_ptr - self._start)
	193	else:
	194	key_element = safe_interned_string_from_size(self._start,
4075.3.1 by John Arbash Meinel Use PyString_InternInPlace to intern() the various parts of keys that are processed.	195	temp_ptr - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	196	# advance our pointer
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	197	self._start = temp_ptr + 1
4672.2.1 by John Arbash Meinel Speed up the innermost btree parsing function.	198	Py_INCREF(key_element)
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	199	StaticTuple_SET_ITEM(key, loop_counter, key_element)
	200	key = StaticTuple_Intern(key)
4679.3.48 by John Arbash Meinel Prototype of not interning sha1: strings.	201	return key
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	202
	203	cdef int process_line(self) except -1:
	204	"""Process a line in the bytes."""
	205	cdef char *last
	206	cdef char *temp_ptr
	207	cdef char *ref_ptr
	208	cdef char *next_start
	209	cdef int loop_counter
4679.8.1 by John Arbash Meinel Merge in the 2.1-static-tuple-btree branch, and restore the string intern tweaks.	210	cdef Py_ssize_t str_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	211
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	212	self._start = self._cur_str
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	213	# Find the next newline
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	214	last = <char*>memchr(self._start, c'\n', self._end_str - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	215	if last == NULL:
	216	# Process until the end of the file
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	217	last = self._end_str
	218	self._cur_str = self._end_str
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	219	else:
	220	# And the next string is right after it
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	221	self._cur_str = last + 1
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	222	# The last character is right before the '\n'
	223
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	224	if last == self._start:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	225	# parsed it all.
	226	return 0
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	227	if last < self._start:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	228	# Unexpected error condition - fail
4634.60.1 by John Arbash Meinel Don't return -1 directly, instead raise an exception.	229	raise AssertionError("last < self._start")
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	230	if 0 == self._header_found:
	231	# The first line in a leaf node is the header "type=leaf\n"
	232	if strncmp("type=leaf", self._start, last - self._start) == 0:
	233	self._header_found = 1
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	234	return 0
	235	else:
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	236	raise AssertionError('Node did not start with "type=leaf": %r'
	237	% (safe_string_from_size(self._start, last - self._start)))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	238
	239	key = self.extract_key(last)
	240	# find the value area
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	241	temp_ptr = <char*>_my_memrchr(self._start, c'\0', last - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	242	if temp_ptr == NULL:
	243	# Invalid line
4634.60.1 by John Arbash Meinel Don't return -1 directly, instead raise an exception.	244	raise AssertionError("Failed to find the value area")
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	245	else:
4679.8.1 by John Arbash Meinel Merge in the 2.1-static-tuple-btree branch, and restore the string intern tweaks.	246	# Because of how conversions were done, we ended up with lots of
	247	# values that are identical. These are all of the 0-length nodes
	248	# that are referred to by the TREE_ROOT (and likely some other
	249	# directory nodes.) For example, bzr has 25k references to
	250	# something like '12607215 328306 0 0', which ends up consuming 1MB
	251	# of memory, just for those strings.
	252	str_len = last - temp_ptr - 1
	253	if (str_len > 4
	254	and strncmp(" 0 0", last - 4, 4) == 0):
	255	# This drops peak mem for bzr.dev from 87.4MB => 86.2MB
	256	# For Launchpad 236MB => 232MB
	257	value = safe_interned_string_from_size(temp_ptr + 1, str_len)
	258	else:
	259	value = safe_string_from_size(temp_ptr + 1, str_len)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	260	# shrink the references end point
	261	last = temp_ptr
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	262
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	263	if self.ref_list_length:
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	264	ref_lists = StaticTuple_New(self.ref_list_length)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	265	loop_counter = 0
	266	while loop_counter < self.ref_list_length:
	267	ref_list = []
	268	# extract a reference list
	269	loop_counter = loop_counter + 1
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	270	if last < self._start:
4634.60.1 by John Arbash Meinel Don't return -1 directly, instead raise an exception.	271	raise AssertionError("last < self._start")
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	272	# find the next reference list end point:
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	273	temp_ptr = <char*>memchr(self._start, c'\t', last - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	274	if temp_ptr == NULL:
	275	# Only valid for the last list
	276	if loop_counter != self.ref_list_length:
	277	# Invalid line
4634.60.1 by John Arbash Meinel Don't return -1 directly, instead raise an exception.	278	raise AssertionError(
	279	"invalid key, loop_counter != self.ref_list_length")
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	280	else:
	281	# scan to the end of the ref list area
	282	ref_ptr = last
	283	next_start = last
	284	else:
	285	# scan to the end of this ref list
	286	ref_ptr = temp_ptr
	287	next_start = temp_ptr + 1
	288	# Now, there may be multiple keys in the ref list.
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	289	while self._start < ref_ptr:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	290	# loop finding keys and extracting them
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	291	temp_ptr = <char*>memchr(self._start, c'\r',
	292	ref_ptr - self._start)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	293	if temp_ptr == NULL:
	294	# key runs to the end
	295	temp_ptr = ref_ptr
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	296
4679.3.28 by John Arbash Meinel Add a KEY_HAS_HASH define.	297	PyList_Append(ref_list, self.extract_key(temp_ptr))
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	298	ref_list = StaticTuple_Intern(StaticTuple(*ref_list))
	299	Py_INCREF(ref_list)
	300	StaticTuple_SET_ITEM(ref_lists, loop_counter - 1, ref_list)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	301	# prepare for the next reference list
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	302	self._start = next_start
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	303	node_value = StaticTuple(value, ref_lists)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	304	else:
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	305	if last != self._start:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	306	# unexpected reference data present
4634.60.1 by John Arbash Meinel Don't return -1 directly, instead raise an exception.	307	raise AssertionError("unexpected reference data present")
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	308	node_value = StaticTuple(value, StaticTuple())
	309	PyList_Append(self.keys, StaticTuple(key, node_value))
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	310	return 0
	311
	312	def parse(self):
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	313	cdef Py_ssize_t byte_count
	314	if not PyString_CheckExact(self.bytes):
	315	raise AssertionError('self.bytes is not a string.')
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	316	byte_count = PyString_Size(self.bytes)
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	317	self._cur_str = PyString_AsString(self.bytes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	318	# This points to the last character in the string
3641.3.2 by John Arbash Meinel Clean up some variable names, add some documentation.	319	self._end_str = self._cur_str + byte_count
	320	while self._cur_str < self._end_str:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	321	self.process_line()
	322	return self.keys
	323
	324
	325	def _parse_leaf_lines(bytes, key_length, ref_list_length):
	326	parser = BTreeLeafParser(bytes, key_length, ref_list_length)
	327	return parser.parse()
3641.3.18 by John Arbash Meinel Start working on a compiled function for transforming	328
	329
5365.5.2 by John Arbash Meinel Parse times are dramatically improved.	330	# TODO: We can go from 8 byte offset + 4 byte length to a simple lookup,
	331	# because the block_offset + length is likely to be repeated. However,
	332	# the big win there is to cache across pages, and not just one page
	333	# Though if we did cache in a page, we could certainly use a short int.
	334	# And this goes from 40 bytes to 30 bytes.
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	335	# One slightly ugly option would be to cache block offsets in a global.
	336	# However, that leads to thread-safety issues, etc.
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	337	ctypedef struct gc_chk_sha1_record:
5365.5.8 by John Arbash Meinel Add tests that we handle sizes close to and >2GB properly.	338	long long block_offset
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	339	unsigned int block_length
	340	unsigned int record_start
	341	unsigned int record_end
	342	char sha1[20]
	343
	344
	345	cdef int _unhexbuf[256]
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	346	cdef char *_hexbuf
	347	_hexbuf = '0123456789abcdef'
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	348
	349	cdef _populate_unhexbuf():
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	350	cdef int i
	351	for i from 0 <= i < 256:
	352	_unhexbuf[i] = -1
	353	for i from 0 <= i < 10: # 0123456789 => map to the raw number
	354	_unhexbuf[(i + c'0')] = i
	355	for i from 10 <= i < 16: # abcdef => 10, 11, 12, 13, 14, 15, 16
	356	_unhexbuf[(i - 10 + c'a')] = i
	357	for i from 10 <= i < 16: # ABCDEF => 10, 11, 12, 13, 14, 15, 16
	358	_unhexbuf[(i - 10 + c'A')] = i
	359	_populate_unhexbuf()
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	360
	361
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	362	cdef int _unhexlify_sha1(char as_hex, char as_bin): # cannot_raise
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	363	"""Take the hex sha1 in as_hex and make it binary in as_bin
	364
	365	Same as binascii.unhexlify, but working on C strings, not Python objects.
	366	"""
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	367	cdef int top
	368	cdef int bot
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	369	cdef int i, j
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	370	cdef char *cur
	371
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	372	# binascii does this using isupper() and tolower() and ?: syntax. I'm
	373	# guessing a simple lookup array should be faster.
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	374	j = 0
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	375	for i from 0 <= i < 20:
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	376	top = _unhexbuf[<unsigned char>(as_hex[j])]
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	377	j = j + 1
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	378	bot = _unhexbuf[<unsigned char>(as_hex[j])]
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	379	j = j + 1
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	380	if top == -1 or bot == -1:
	381	return 0
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	382	as_bin[i] = <unsigned char>((top << 4) + bot);
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	383	return 1
	384
	385
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	386	def _py_unhexlify(as_hex):
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	387	"""For the test infrastructure, just thunks to _unhexlify_sha1"""
	388	if len(as_hex) != 40 or not PyString_CheckExact(as_hex):
	389	raise ValueError('not a 40-byte hex digest')
	390	as_bin = PyString_FromStringAndSize(NULL, 20)
	391	if _unhexlify_sha1(PyString_AS_STRING(as_hex), PyString_AS_STRING(as_bin)):
	392	return as_bin
	393	return None
	394
	395
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	396	cdef void _hexlify_sha1(char as_bin, char as_hex): # cannot_raise
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	397	cdef int i, j
	398	cdef char c
	399
	400	j = 0
	401	for i from 0 <= i < 20:
	402	c = as_bin[i]
	403	as_hex[j] = _hexbuf[(c>>4)&0xf]
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	404	j = j + 1
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	405	as_hex[j] = _hexbuf[(c)&0xf]
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	406	j = j + 1
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	407
	408
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	409	def _py_hexlify(as_bin):
5365.5.4 by John Arbash Meinel Some direct tests of the hex and unhex functions.	410	"""For test infrastructure, thunk to _hexlify_sha1"""
	411	if len(as_bin) != 20 or not PyString_CheckExact(as_bin):
	412	raise ValueError('not a 20-byte binary digest')
	413	as_hex = PyString_FromStringAndSize(NULL, 40)
	414	_hexlify_sha1(PyString_AS_STRING(as_bin), PyString_AS_STRING(as_hex))
	415	return as_hex
	416
	417
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	418	cdef int _key_to_sha1(key, char *sha1): # cannot_raise
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	419	"""Map a key into its sha1 content.
	420
	421	:param key: A tuple of style ('sha1:abcd...',)
	422	:param sha1: A char buffer of 20 bytes
	423	:return: 1 if this could be converted, 0 otherwise
	424	"""
	425	cdef char *c_val
5365.5.11 by John Arbash Meinel get into the nitty gritty for the _key_to_sha1 function.	426	cdef PyObject *p_val
	427
	428	if StaticTuple_CheckExact(key) and StaticTuple_GET_SIZE(key) == 1:
	429	p_val = <PyObject *>StaticTuple_GET_ITEM(key, 0)
	430	elif (PyTuple_CheckExact(key) and PyTuple_GET_SIZE(key) == 1):
	431	p_val = PyTuple_GET_ITEM_ptr_object(key, 0)
	432	else:
	433	# Not a tuple or a StaticTuple
	434	return 0
	435	if (PyString_CheckExact_ptr(p_val) and PyString_GET_SIZE_ptr(p_val) == 45):
	436	c_val = PyString_AS_STRING_ptr(p_val)
	437	else:
	438	return 0
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	439	if strncmp(c_val, 'sha1:', 5) != 0:
	440	return 0
	441	if not _unhexlify_sha1(c_val + 5, sha1):
	442	return 0
	443	return 1
	444
	445
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	446	def _py_key_to_sha1(key):
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	447	"""Map a key to a simple sha1 string.
	448
	449	This is a testing thunk to the C function.
	450	"""
	451	as_bin_sha = PyString_FromStringAndSize(NULL, 20)
	452	if _key_to_sha1(key, PyString_AS_STRING(as_bin_sha)):
	453	return as_bin_sha
	454	return None
	455
	456
	457	cdef StaticTuple _sha1_to_key(char *sha1):
	458	"""Compute a ('sha1:abcd',) key for a given sha1."""
	459	cdef StaticTuple key
	460	cdef object hexxed
	461	cdef char *c_buf
	462	hexxed = PyString_FromStringAndSize(NULL, 45)
	463	c_buf = PyString_AS_STRING(hexxed)
	464	memcpy(c_buf, 'sha1:', 5)
	465	_hexlify_sha1(sha1, c_buf+5)
	466	key = StaticTuple_New(1)
	467	Py_INCREF(hexxed)
	468	StaticTuple_SET_ITEM(key, 0, hexxed)
5365.5.7 by John Arbash Meinel Document some perf decisions.	469	# This is a bit expensive. To parse 120 keys takes 48us, to return them all
	470	# can be done in 66.6us (so 18.6us to build them all).
	471	# Adding simple hash() here brings it to 76.6us (so computing the hash
	472	# value of 120keys is 10us), Intern is 86.9us (another 10us to look and add
	473	# them to the intern structure.)
	474	# However, since we only intern keys that are in active use, it is probably
	475	# a win. Since they would have been read from elsewhere anyway.
	476	# We could hang the PyObject form off of the gc_chk_sha1_record for ones
	477	# that we have deserialized. Something to think about, at least.
5365.5.6 by John Arbash Meinel For a single page test, we parse() still takes 47us	478	key = StaticTuple_Intern(key)
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	479	return key
	480
	481
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	482	def _py_sha1_to_key(sha1_bin):
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	483	"""Test thunk to check the sha1 mapping."""
	484	if not PyString_CheckExact(sha1_bin) or PyString_GET_SIZE(sha1_bin) != 20:
	485	raise ValueError('sha1_bin must be a str of exactly 20 bytes')
	486	return _sha1_to_key(PyString_AS_STRING(sha1_bin))
	487
	488
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	489	cdef unsigned int _sha1_to_uint(char *sha1): # cannot_raise
5365.5.13 by John Arbash Meinel Populate the offsets array.	490	cdef unsigned int val
	491	# Must be in MSB, because that is how the content is sorted
	492	val = (((<unsigned int>(sha1[0]) & 0xff) << 24)
	493	\| ((<unsigned int>(sha1[1]) & 0xff) << 16)
	494	\| ((<unsigned int>(sha1[2]) & 0xff) << 8)
	495	\| ((<unsigned int>(sha1[3]) & 0xff) << 0))
	496	return val
	497
	498
5365.5.28 by John Arbash Meinel Lots of compatibility changes for python2.4 and mingw32 compilers.	499	cdef _format_record(gc_chk_sha1_record *record):
	500	# This is inefficient to go from a logical state back to a
	501	# string, but it makes things work a bit better internally for now.
	502	if record.block_offset >= 0xFFFFFFFF:
	503	# %llu is what we really want, but unfortunately it was only added
	504	# in python 2.7... :(
	505	block_offset_str = str(record.block_offset)
5689.1.1 by Martin Use correct format character for unsigned int gc_chk_sha1_record members	506	value = PyString_FromFormat('%s %u %u %u',
5365.5.28 by John Arbash Meinel Lots of compatibility changes for python2.4 and mingw32 compilers.	507	PyString_AS_STRING(block_offset_str),
	508	record.block_length,
	509	record.record_start, record.record_end)
	510	else:
5689.1.1 by Martin Use correct format character for unsigned int gc_chk_sha1_record members	511	value = PyString_FromFormat('%lu %u %u %u',
5365.5.28 by John Arbash Meinel Lots of compatibility changes for python2.4 and mingw32 compilers.	512	<unsigned long>record.block_offset,
	513	record.block_length,
	514	record.record_start, record.record_end)
	515	return value
	516
	517
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	518	cdef class GCCHKSHA1LeafNode:
	519	"""Track all the entries for a given leaf node."""
	520
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	521	cdef gc_chk_sha1_record *records
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	522	cdef public object last_key
	523	cdef gc_chk_sha1_record *last_record
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	524	cdef public int num_records
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	525	# This is the number of bits to shift to get to the interesting byte. A
	526	# value of 24 means that the very first byte changes across all keys.
	527	# Anything else means that there is a common prefix of bits that we can
	528	# ignore. 0 means that at least the first 3 bytes are identical, though
	529	# that is going to be very rare
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	530	cdef public unsigned char common_shift
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	531	# This maps an interesting byte to the first record that matches.
	532	# Equivalent to bisect.bisect_left(self.records, sha1), though only taking
	533	# into account that one byte.
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	534	cdef unsigned char offsets[257]
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	535
	536	def __sizeof__(self):
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	537	# :( Why doesn't Pyrex let me do a simple sizeof(GCCHKSHA1LeafNode)
	538	# like Cython? Explicitly enumerating everything here seems to leave my
	539	# size off by 2 (286 bytes vs 288 bytes actual). I'm guessing it is an
	540	# alignment/padding issue. Oh well- at least we scale properly with
	541	# num_records and are very close to correct, which is what I care
	542	# about.
	543	# If we ever decide to require cython:
	544	# return (sizeof(GCCHKSHA1LeafNode)
	545	# + sizeof(gc_chk_sha1_record)*self.num_records)
	546	return (sizeof(PyObject) + sizeof(void*) + sizeof(int)
	547	+ sizeof(gc_chk_sha1_record) + sizeof(PyObject )
	548	+ sizeof(gc_chk_sha1_record*) + sizeof(char)
	549	+ sizeof(unsigned char)*257
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	550	+ sizeof(gc_chk_sha1_record)*self.num_records)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	551
	552	def __dealloc__(self):
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	553	if self.records != NULL:
	554	PyMem_Free(self.records)
	555	self.records = NULL
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	556
	557	def __init__(self, bytes):
	558	self._parse_bytes(bytes)
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	559	self.last_key = None
	560	self.last_record = NULL
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	561
	562	property min_key:
	563	def __get__(self):
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	564	if self.num_records > 0:
	565	return _sha1_to_key(self.records[0].sha1)
5365.5.2 by John Arbash Meinel Parse times are dramatically improved.	566	return None
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	567
	568	property max_key:
	569	def __get__(self):
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	570	if self.num_records > 0:
	571	return _sha1_to_key(self.records[self.num_records-1].sha1)
5365.5.2 by John Arbash Meinel Parse times are dramatically improved.	572	return None
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	573
	574	cdef StaticTuple _record_to_value_and_refs(self,
	575	gc_chk_sha1_record *record):
	576	"""Extract the refs and value part of this record."""
	577	cdef StaticTuple value_and_refs
	578	cdef StaticTuple empty
	579	value_and_refs = StaticTuple_New(2)
5848.2.1 by John Arbash Meinel Break compatibility with python <2.6.	580	value = _format_record(record)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	581	Py_INCREF(value)
	582	StaticTuple_SET_ITEM(value_and_refs, 0, value)
	583	# Always empty refs
	584	empty = StaticTuple_New(0)
	585	Py_INCREF(empty)
	586	StaticTuple_SET_ITEM(value_and_refs, 1, empty)
	587	return value_and_refs
	588
	589	cdef StaticTuple _record_to_item(self, gc_chk_sha1_record *record):
	590	"""Turn a given record back into a fully fledged item.
	591	"""
	592	cdef StaticTuple item
	593	cdef StaticTuple key
	594	cdef StaticTuple value_and_refs
	595	cdef object value
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	596	key = _sha1_to_key(record.sha1)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	597	item = StaticTuple_New(2)
	598	Py_INCREF(key)
	599	StaticTuple_SET_ITEM(item, 0, key)
	600	value_and_refs = self._record_to_value_and_refs(record)
	601	Py_INCREF(value_and_refs)
	602	StaticTuple_SET_ITEM(item, 1, value_and_refs)
	603	return item
	604
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	605	cdef gc_chk_sha1_record* _lookup_record(self, char *sha1) except? NULL:
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	606	"""Find a gc_chk_sha1_record that matches the sha1 supplied."""
5365.5.21 by John Arbash Meinel Remove counters, start the cleanup	607	cdef int lo, hi, mid, the_cmp
5365.5.13 by John Arbash Meinel Populate the offsets array.	608	cdef int offset
5365.5.9 by John Arbash Meinel Add some tests that key lookup works.	609
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	610	# TODO: We can speed up misses by comparing this sha1 to the common
	611	# bits, and seeing if the common prefix matches, if not, we don't
	612	# need to search for anything because it cannot match
	613	# Use the offset array to find the closest fit for this entry
	614	# follow that up with bisecting, since multiple keys can be in one
	615	# spot
	616	# Bisecting dropped us from 7000 comparisons to 582 (4.8/key), using
	617	# the offset array dropped us from 23us to 20us and 156 comparisions
	618	# (1.3/key)
5365.5.13 by John Arbash Meinel Populate the offsets array.	619	offset = self._offset_for_sha1(sha1)
	620	lo = self.offsets[offset]
	621	hi = self.offsets[offset+1]
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	622	if hi == 255:
	623	# if hi == 255 that means we potentially ran off the end of the
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	624	# list, so push it up to num_records
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	625	# note that if 'lo' == 255, that is ok, because we can start
	626	# searching from that part of the list.
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	627	hi = self.num_records
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	628	local_n_cmp = 0
5365.5.9 by John Arbash Meinel Add some tests that key lookup works.	629	while lo < hi:
	630	mid = (lo + hi) / 2
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	631	the_cmp = memcmp(self.records[mid].sha1, sha1, 20)
5365.5.9 by John Arbash Meinel Add some tests that key lookup works.	632	if the_cmp == 0:
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	633	return &self.records[mid]
5365.5.9 by John Arbash Meinel Add some tests that key lookup works.	634	elif the_cmp < 0:
5365.5.13 by John Arbash Meinel Populate the offsets array.	635	lo = mid + 1
5365.5.13 by John Arbash Meinel Populate the offsets array.	636	else:
5365.5.9 by John Arbash Meinel Add some tests that key lookup works.	637	hi = mid
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	638	return NULL
	639
	640	def __contains__(self, key):
	641	cdef char sha1[20]
	642	cdef gc_chk_sha1_record *record
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	643	if _key_to_sha1(key, sha1):
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	644	# If it isn't a sha1 key, then it won't be in this leaf node
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	645	record = self._lookup_record(sha1)
	646	if record != NULL:
	647	self.last_key = key
	648	self.last_record = record
	649	return True
	650	return False
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	651
	652	def __getitem__(self, key):
	653	cdef char sha1[20]
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	654	cdef gc_chk_sha1_record *record
	655	record = NULL
5365.5.19 by John Arbash Meinel Lots of instrumentation. Add a last-lookup cache.	656	if self.last_record != NULL and key is self.last_key:
	657	record = self.last_record
	658	elif _key_to_sha1(key, sha1):
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	659	record = self._lookup_record(sha1)
	660	if record == NULL:
	661	raise KeyError('key %r is not present' % (key,))
	662	return self._record_to_value_and_refs(record)
	663
	664	def __len__(self):
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	665	return self.num_records
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	666
	667	def all_keys(self):
	668	cdef int i
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	669	result = []
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	670	for i from 0 <= i < self.num_records:
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	671	PyList_Append(result, _sha1_to_key(self.records[i].sha1))
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	672	return result
	673
	674	def all_items(self):
	675	cdef int i
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	676	result = []
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	677	for i from 0 <= i < self.num_records:
	678	item = self._record_to_item(&self.records[i])
5365.5.22 by John Arbash Meinel Pyrexification. Rip out all the stuff that only Cython or new versions of pyrex understand.	679	PyList_Append(result, item)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	680	return result
	681
5365.5.29 by John Arbash Meinel Handle test_source and extensions. Also define an 'extern' protocol, to allow	682	cdef int _count_records(self, char c_content, char c_end): # cannot_raise
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	683	"""Count how many records are in this section."""
	684	cdef char *c_cur
	685	cdef int num_records
	686
	687	c_cur = c_content
	688	num_records = 0
	689	while c_cur != NULL and c_cur < c_end:
	690	c_cur = <char *>memchr(c_cur, c'\n', c_end - c_cur);
	691	if c_cur == NULL:
	692	break
	693	c_cur = c_cur + 1
	694	num_records = num_records + 1
	695	return num_records
	696
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	697	cdef _parse_bytes(self, bytes):
	698	"""Parse the string 'bytes' into content."""
	699	cdef char *c_bytes
	700	cdef char *c_cur
	701	cdef char *c_end
	702	cdef Py_ssize_t n_bytes
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	703	cdef int num_records
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	704	cdef int entry
	705	cdef gc_chk_sha1_record *cur_record
	706
	707	if not PyString_CheckExact(bytes):
	708	raise TypeError('We only support parsing plain 8-bit strings.')
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	709	# Pass 1, count how many records there will be
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	710	n_bytes = PyString_GET_SIZE(bytes)
	711	c_bytes = PyString_AS_STRING(bytes)
	712	c_end = c_bytes + n_bytes
	713	if strncmp(c_bytes, 'type=leaf\n', 10):
	714	raise ValueError("bytes did not start with 'type=leaf\\n': %r"
	715	% (bytes[:10],))
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	716	c_cur = c_bytes + 10
	717	num_records = self._count_records(c_cur, c_end)
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	718	# Now allocate the memory for these items, and go to town
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	719	self.records = <gc_chk_sha1_record>PyMem_Malloc(num_records
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	720	(sizeof(unsigned short) + sizeof(gc_chk_sha1_record)))
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	721	self.num_records = num_records
	722	cur_record = self.records
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	723	entry = 0
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	724	while c_cur != NULL and c_cur < c_end and entry < num_records:
5365.5.16 by John Arbash Meinel Trim the class a little bit.	725	c_cur = self._parse_one_entry(c_cur, c_end, cur_record)
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	726	cur_record = cur_record + 1
	727	entry = entry + 1
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	728	if (entry != self.num_records
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	729	or c_cur != c_end
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	730	or cur_record != self.records + self.num_records):
5365.5.5 by John Arbash Meinel Found some more bugs. I wasn't incrementing one of the pointers,	731	raise ValueError('Something went wrong while parsing.')
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	732	# Pass 3: build the offset map
5365.5.13 by John Arbash Meinel Populate the offsets array.	733	self._compute_common()
5365.5.13 by John Arbash Meinel Populate the offsets array.	734
5365.5.16 by John Arbash Meinel Trim the class a little bit.	735	cdef char _parse_one_entry(self, char c_cur, char *c_end,
	736	gc_chk_sha1_record *cur_record) except NULL:
	737	"""Read a single sha record from the bytes.
	738	:param c_cur: The pointer to the start of bytes
	739	:param cur_record:
	740	"""
	741	cdef char *c_next
	742	if strncmp(c_cur, 'sha1:', 5):
	743	raise ValueError('line did not start with sha1: %r'
	744	% (safe_string_from_size(c_cur, 10),))
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	745	c_cur = c_cur + 5
5365.5.16 by John Arbash Meinel Trim the class a little bit.	746	c_next = <char *>memchr(c_cur, c'\0', c_end - c_cur)
	747	if c_next == NULL or (c_next - c_cur != 40):
	748	raise ValueError('Line did not contain 40 hex bytes')
	749	if not _unhexlify_sha1(c_cur, cur_record.sha1):
	750	raise ValueError('We failed to unhexlify')
	751	c_cur = c_next + 1
	752	if c_cur[0] != c'\0':
	753	raise ValueError('only 1 null, not 2 as expected')
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	754	c_cur = c_cur + 1
5365.5.16 by John Arbash Meinel Trim the class a little bit.	755	cur_record.block_offset = strtoll(c_cur, &c_next, 10)
	756	if c_cur == c_next or c_next[0] != c' ':
	757	raise ValueError('Failed to parse block offset')
	758	c_cur = c_next + 1
	759	cur_record.block_length = strtoul(c_cur, &c_next, 10)
	760	if c_cur == c_next or c_next[0] != c' ':
	761	raise ValueError('Failed to parse block length')
	762	c_cur = c_next + 1
	763	cur_record.record_start = strtoul(c_cur, &c_next, 10)
	764	if c_cur == c_next or c_next[0] != c' ':
	765	raise ValueError('Failed to parse block length')
	766	c_cur = c_next + 1
	767	cur_record.record_end = strtoul(c_cur, &c_next, 10)
	768	if c_cur == c_next or c_next[0] != c'\n':
	769	raise ValueError('Failed to parse record end')
	770	c_cur = c_next + 1
	771	return c_cur
	772
5365.5.13 by John Arbash Meinel Populate the offsets array.	773	cdef int _offset_for_sha1(self, char *sha1) except -1:
	774	"""Find the first interesting 8-bits of this sha1."""
	775	cdef int this_offset
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	776	cdef unsigned int as_uint
	777	as_uint = _sha1_to_uint(sha1)
	778	this_offset = (as_uint >> self.common_shift) & 0xFF
5365.5.13 by John Arbash Meinel Populate the offsets array.	779	return this_offset
5365.5.13 by John Arbash Meinel Populate the offsets array.	780
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	781	def _get_offset_for_sha1(self, sha1):
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	782	return self._offset_for_sha1(PyString_AS_STRING(sha1))
	783
5365.5.13 by John Arbash Meinel Populate the offsets array.	784	cdef _compute_common(self):
	785	cdef unsigned int first
	786	cdef unsigned int this
	787	cdef unsigned int common_mask
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	788	cdef unsigned char common_shift
5365.5.13 by John Arbash Meinel Populate the offsets array.	789	cdef int i
5365.5.13 by John Arbash Meinel Populate the offsets array.	790	cdef int offset, this_offset
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	791	cdef int max_offset
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	792	# The idea with the offset map is that we should be able to quickly
	793	# jump to the key that matches a gives sha1. We know that the keys are
	794	# in sorted order, and we know that a lot of the prefix is going to be
	795	# the same across them.
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	796	# By XORing the records together, we can determine what bits are set in
5365.5.13 by John Arbash Meinel Populate the offsets array.	797	# all of them
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	798	if self.num_records < 2:
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	799	# Everything is in common if you have 0 or 1 leaves
	800	# So we'll always just shift to the first byte
	801	self.common_shift = 24
5365.5.13 by John Arbash Meinel Populate the offsets array.	802	else:
5365.5.13 by John Arbash Meinel Populate the offsets array.	803	common_mask = 0xFFFFFFFF
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	804	first = _sha1_to_uint(self.records[0].sha1)
	805	for i from 0 < i < self.num_records:
	806	this = _sha1_to_uint(self.records[i].sha1)
5365.5.13 by John Arbash Meinel Populate the offsets array.	807	common_mask = (~(first ^ this)) & common_mask
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	808	common_shift = 24
	809	while common_mask & 0x80000000 and common_shift > 0:
5365.5.13 by John Arbash Meinel Populate the offsets array.	810	common_mask = common_mask << 1
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	811	common_shift = common_shift - 1
5365.5.13 by John Arbash Meinel Populate the offsets array.	812	self.common_shift = common_shift
5365.5.13 by John Arbash Meinel Populate the offsets array.	813	offset = 0
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	814	max_offset = self.num_records
	815	# We cap this loop at 254 records. All the other offsets just get
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	816	# filled with 0xff as the singleton saying 'too many'.
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	817	# It means that if we have >255 records we have to bisect the second
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	818	# half of the list, but this is going to be very rare in practice.
	819	if max_offset > 255:
	820	max_offset = 255
	821	for i from 0 <= i < max_offset:
5365.5.17 by John Arbash Meinel rename 'entries' to 'records', some comment cleanups.	822	this_offset = self._offset_for_sha1(self.records[i].sha1)
5365.5.13 by John Arbash Meinel Populate the offsets array.	823	while offset <= this_offset:
5365.5.13 by John Arbash Meinel Populate the offsets array.	824	self.offsets[offset] = i
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	825	offset = offset + 1
5365.5.13 by John Arbash Meinel Populate the offsets array.	826	while offset < 257:
5365.5.15 by John Arbash Meinel Handle the case where there are more than 255 entries.	827	self.offsets[offset] = max_offset
5365.5.27 by John Arbash Meinel switch 'x += 1' to 'x = x + 1' to deal with brain-damaged old versions of pyrex.	828	offset = offset + 1
5365.5.13 by John Arbash Meinel Populate the offsets array.	829
5365.5.24 by John Arbash Meinel Change the _test names to _get and _py so that it doesn't provoke bad smells :)	830	def _get_offsets(self):
	831	cdef int i
	832	result = []
	833	for i from 0 <= i < 257:
	834	PyList_Append(result, self.offsets[i])
	835	return result
5365.5.1 by John Arbash Meinel Implement a custom parser for chk btree leaves.	836
	837
	838	def _parse_into_chk(bytes, key_length, ref_list_length):
	839	"""Parse into a format optimized for chk records."""
	840	assert key_length == 1
	841	assert ref_list_length == 0
	842	return GCCHKSHA1LeafNode(bytes)
	843
	844
3641.3.18 by John Arbash Meinel Start working on a compiled function for transforming	845	def _flatten_node(node, reference_lists):
	846	"""Convert a node into the serialized form.
	847
	848	:param node: A tuple representing a node:
	849	(index, key_tuple, value, references)
	850	:param reference_lists: Does this index have reference lists?
	851	:return: (string_key, flattened)
	852	string_key The serialized key for referencing this node
	853	flattened A string with the serialized form for the contents
	854	"""
3641.3.26 by John Arbash Meinel A couple small tweaks.	855	cdef int have_reference_lists
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	856	cdef Py_ssize_t flat_len
	857	cdef Py_ssize_t key_len
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	858	cdef Py_ssize_t node_len
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	859	cdef char * value
	860	cdef Py_ssize_t value_len
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	861	cdef char * out
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	862	cdef Py_ssize_t refs_len
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	863	cdef Py_ssize_t next_len
3641.3.21 by John Arbash Meinel Flatten the outermost str.join() into memcpy's	864	cdef int first_ref_list
3641.3.22 by John Arbash Meinel flatten the next level	865	cdef int first_reference
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	866	cdef int i
	867	cdef Py_ssize_t ref_bit_len
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	868
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	869	if not PyTuple_CheckExact(node) and not StaticTuple_CheckExact(node):
	870	raise TypeError('We expected a tuple() or StaticTuple() for node not: %s'
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	871	% type(node))
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	872	node_len = len(node)
3641.3.26 by John Arbash Meinel A couple small tweaks.	873	have_reference_lists = reference_lists
3641.3.26 by John Arbash Meinel A couple small tweaks.	874	if have_reference_lists:
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	875	if node_len != 4:
	876	raise ValueError('With ref_lists, we expected 4 entries not: %s'
	877	% len(node))
	878	elif node_len < 3:
	879	raise ValueError('Without ref_lists, we need at least 3 entries not: %s'
	880	% len(node))
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	881	# TODO: We can probably do better than string.join(), namely
	882	# when key has only 1 item, we can just grab that string
	883	# And when there are 2 items, we could do a single malloc + len() + 1
	884	# also, doing .join() requires a PyObject_GetAttrString call, which
	885	# we could also avoid.
4679.8.4 by John Arbash Meinel Comment about compiled code quality.	886	# TODO: Note that pyrex 0.9.6 generates fairly crummy code here, using the
	887	# python object interface, versus 0.9.8+ which uses a helper that
	888	# checks if this supports the sequence interface.
	889	# We could do more work on our own, and grab the actual items
	890	# lists. For now, just ask people to use a better compiler. :)
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	891	string_key = '\0'.join(node[1])
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	892
3641.3.18 by John Arbash Meinel Start working on a compiled function for transforming	893	# TODO: instead of using string joins, precompute the final string length,
	894	# and then malloc a single string and copy everything in.
3641.3.19 by John Arbash Meinel The flatten code now handles the no-ref-list case.	895
	896	# TODO: We probably want to use PySequenceFast, because we have lists and
	897	# tuples, but we aren't sure which we will get.
	898
	899	# line := string_key NULL flat_refs NULL value LF
	900	# string_key := BYTES (NULL BYTES)*
	901	# flat_refs := ref_list (TAB ref_list)*
	902	# ref_list := ref (CR ref)*
	903	# ref := BYTES (NULL BYTES)*
	904	# value := BYTES
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	905	refs_len = 0
3641.3.26 by John Arbash Meinel A couple small tweaks.	906	if have_reference_lists:
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	907	# Figure out how many bytes it will take to store the references
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	908	ref_lists = node[3]
3641.3.24 by John Arbash Meinel Use the compiled flatten function.	909	next_len = len(ref_lists) # TODO: use a Py function
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	910	if next_len > 0:
	911	# If there are no nodes, we don't need to do any work
	912	# Otherwise we will need (len - 1) '\t' characters to separate
	913	# the reference lists
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	914	refs_len = refs_len + (next_len - 1)
3641.3.24 by John Arbash Meinel Use the compiled flatten function.	915	for ref_list in ref_lists:
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	916	next_len = len(ref_list)
	917	if next_len > 0:
	918	# We will need (len - 1) '\r' characters to separate the
	919	# references
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	920	refs_len = refs_len + (next_len - 1)
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	921	for reference in ref_list:
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	922	if (not PyTuple_CheckExact(reference)
	923	and not StaticTuple_CheckExact(reference)):
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	924	raise TypeError(
	925	'We expect references to be tuples not: %s'
	926	% type(reference))
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	927	next_len = len(reference)
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	928	if next_len > 0:
	929	# We will need (len - 1) '\x00' characters to
	930	# separate the reference key
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	931	refs_len = refs_len + (next_len - 1)
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	932	for ref_bit in reference:
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	933	if not PyString_CheckExact(ref_bit):
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	934	raise TypeError('We expect reference bits'
	935	' to be strings not: %s'
	936	% type(<object>ref_bit))
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	937	refs_len = refs_len + PyString_GET_SIZE(ref_bit)
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	938
	939	# So we have the (key NULL refs NULL value LF)
	940	key_len = PyString_Size(string_key)
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	941	val = node[2]
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	942	if not PyString_CheckExact(val):
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	943	raise TypeError('Expected a plain str for value not: %s'
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	944	% type(val))
	945	value = PyString_AS_STRING(val)
	946	value_len = PyString_GET_SIZE(val)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	947	flat_len = (key_len + 1 + refs_len + 1 + value_len + 1)
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	948	line = PyString_FromStringAndSize(NULL, flat_len)
	949	# Get a pointer to the new buffer
	950	out = PyString_AsString(line)
	951	memcpy(out, PyString_AsString(string_key), key_len)
	952	out = out + key_len
	953	out[0] = c'\0'
	954	out = out + 1
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	955	if refs_len > 0:
3641.3.21 by John Arbash Meinel Flatten the outermost str.join() into memcpy's	956	first_ref_list = 1
3641.3.24 by John Arbash Meinel Use the compiled flatten function.	957	for ref_list in ref_lists:
3641.3.21 by John Arbash Meinel Flatten the outermost str.join() into memcpy's	958	if first_ref_list == 0:
	959	out[0] = c'\t'
	960	out = out + 1
	961	first_ref_list = 0
3641.3.22 by John Arbash Meinel flatten the next level	962	first_reference = 1
3641.3.21 by John Arbash Meinel Flatten the outermost str.join() into memcpy's	963	for reference in ref_list:
3641.3.22 by John Arbash Meinel flatten the next level	964	if first_reference == 0:
	965	out[0] = c'\r'
	966	out = out + 1
	967	first_reference = 0
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	968	next_len = len(reference)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	969	for i from 0 <= i < next_len:
	970	if i != 0:
3641.3.23 by John Arbash Meinel Flattened all the way down the stack.	971	out[0] = c'\x00'
	972	out = out + 1
4679.8.2 by John Arbash Meinel A bit of tweaking to _flatten_node.	973	ref_bit = reference[i]
4679.7.1 by John Arbash Meinel Merge the 2.1-static-tuple-no-use branch, but restore the	974	ref_bit_len = PyString_GET_SIZE(ref_bit)
	975	memcpy(out, PyString_AS_STRING(ref_bit), ref_bit_len)
3641.3.25 by John Arbash Meinel Shave off some more time by using exact accessors.	976	out = out + ref_bit_len
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	977	out[0] = c'\0'
3641.3.22 by John Arbash Meinel flatten the next level	978	out = out + 1
3641.3.20 by John Arbash Meinel We have a single malloc for the final output.	979	memcpy(out, value, value_len)
	980	out = out + value_len
	981	out[0] = c'\n'
3641.3.18 by John Arbash Meinel Start working on a compiled function for transforming	982	return string_key, line