~bzr-pqm/bzr/bzr.dev : contents of bzrlib/compiled/dirstate

~bzr-pqm/bzr/bzr.dev : (revision 2474.1.7)

# Copyright (C) 2007 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Helper functions for DirState.

This is the python implementation for DirState functions.
"""

from bzrlib.dirstate import DirState


cdef extern from *:
    ctypedef int size_t

cdef extern from "Python.h":
    # GetItem returns a borrowed reference
    void *PyDict_GetItem(object p, object key)
    int PyDict_SetItem(object p, object key, object val) except -1
    object PyList_GetItem(object lst, int index)
    object PyTuple_GetItem(object tpl, int index)

    char* PyString_AsString(object p)
    int PyString_Size(object p)


cdef extern from "string.h":
    int strncmp(char *s1, char *s2, size_t len)
    int strcmp(char *s1, char *s2)
    char *strchr(char *s1, char c)


cdef object _split_from_path(object cache, object path):
    """get the dirblock tuple for a given path.

    :param cache: A Dictionary mapping string paths to tuples
    :param path: The path we care about.
    :return: A borrowed reference to a tuple stored in cache.
        You do not need to Py_DECREF() when you are done, unless you plan on
        using it for a while.
    """
    cdef void* value_ptr
    cdef object value

    value_ptr = PyDict_GetItem(cache, path)
    if value_ptr == NULL:
        value = path.split('/')
        PyDict_SetItem(cache, path, value)
    else:
        value = <object>value_ptr

    return value


cdef int _bisect_dirblock_nocache(object dirblocks, object dirname, int _lo, int _hi):
    cdef int _mid
    cdef object cur
    cdef object cur_split
    cdef object dirname_split

    dirname_split = dirname.split('/')

    while _lo < _hi:
        _mid = (_lo+_hi)/2
        # Grab the dirname for the current dirblock
        cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
        cur_split = cur.split('/')
        if cur_split < dirname_split: _lo = _mid+1
        else: _hi = _mid
    return _lo


cdef int _cmp_dirblock_strings(char *path1, int size1, char *path2, int size2):
    """This compares 2 strings separating on path sections.

    This is equivalent to "cmp(path1.split('/'), path2.split('/'))"
    However, we don't want to create an extra object for doing the split.

    :param path1: The first path to compare
    :param size1: The length of the first path
    :param path2: The second path
    :param size1: The length of the second path
    :return: 0 if they are equal, -1 if path1 comes first, 1 if path2 comes
        first
    """
    cdef char *base1
    cdef char *base2
    cdef char *tip1
    cdef char *tip2
    cdef char *end1
    cdef char *end2
    cdef int cur_len1
    cdef int cur_len2
    cdef int cmp_len
    cdef int diff

    base1 = path1
    base2 = path2
    end1 = base1 + size1
    end2 = base2 + size2

    # Ensure that we are pointing to the final NULL terminator on both ends
    assert end1[0] == c'\x00'
    assert end2[0] == c'\x00'

    while base1 < end1 and base2 < end2:
        # Find the next path separator
        # (This is where you would like strchrnul)
        tip1 = strchr(base1, c'/')
        tip2 = strchr(base2, c'/')

        if tip1 == NULL:
            tip1 = end1
        if tip2 == NULL:
            tip2 = end2

        cur_len1 = tip1 - base1
        cur_len2 = tip2 - base2
        cmp_len = cur_len1
        if cur_len2 < cur_len1:
            cmp_len = cur_len2

        diff = strncmp(base1, base2, cmp_len)
        # print 'comparing "%s", "%s", %d = %d' % (base1, base2, cmp_len, diff)
        if diff != 0:
            return diff
        if cur_len1 < cur_len2:
            return -1
        elif cur_len1 > cur_len2:
            return 1
        base1 = tip1+1
        base2 = tip2+1
    # Do we still have uncompared characters?
    if base1 < end1:
        return 1
    if base2 < end2:
        return -1
    return 0


def cmp_dirblock_strings(path1, path2):
    """Compare to python strings in dirblock fashion."""
    return _cmp_dirblock_strings(PyString_AsString(path1),
                                 PyString_Size(path1),
                                 PyString_AsString(path2),
                                 PyString_Size(path2))


def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
    """Return the index where to insert dirname into the dirblocks.

    The return value idx is such that all directories blocks in dirblock[:idx]
    have names < dirname, and all blocks in dirblock[idx:] have names >=
    dirname.

    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
    slice of a to be searched.
    """
    cdef int _lo
    cdef int _hi
    cdef int _mid
    cdef object dirname_split
    cdef object cur_split

    if hi is None:
        _hi = len(dirblocks)
    else:
        _hi = hi

    _lo = lo
    if cache is None:
        return _bisect_dirblock_nocache(dirblocks, dirname, _lo, _hi)

    dirname_split = _split_from_path(cache, dirname)
    while _lo < _hi:
        _mid = (_lo+_hi)/2
        # Grab the dirname for the current dirblock
        cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
        cur_split = _split_from_path(cache, cur)
        if cur_split < dirname_split: _lo = _mid+1
        else: _hi = _mid
    return _lo


def _read_dirblocks(state):
    """Read in the dirblocks for the given DirState object.

    This is tightly bound to the DirState internal representation. It should be
    thought of as a member function, which is only separated out so that we can
    re-write it in pyrex.

    :param state: A DirState object.
    :return: None
    """
    cdef int pos
    cdef int entry_size
    cdef int field_count

    state._state_file.seek(state._end_of_header)
    text = state._state_file.read()
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)

    fields = text.split('\0')
    # Remove the last blank entry
    trailing = fields.pop()
    assert trailing == ''
    # consider turning fields into a tuple.

    # skip the first field which is the trailing null from the header.
    cur = 1
    # Each line now has an extra '\n' field which is not used
    # so we just skip over it
    # entry size:
    #  3 fields for the key
    #  + number of fields per tree_data (5) * tree count
    #  + newline
    num_present_parents = state._num_present_parents()
    tree_count = 1 + num_present_parents
    entry_size = state._fields_per_entry()
    expected_field_count = entry_size * state._num_entries
    field_count = len(fields)
    # this checks our adjustment, and also catches file too short.
    assert field_count - cur == expected_field_count, \
        'field count incorrect %s != %s, entry_size=%s, '\
        'num_entries=%s fields=%r' % (
            field_count - cur, expected_field_count, entry_size,
            state._num_entries, fields)

    if num_present_parents == 1:
        # Bind external functions to local names
        _int = int
        # We access all fields in order, so we can just iterate over
        # them. Grab an straight iterator over the fields. (We use an
        # iterator because we don't want to do a lot of additions, nor
        # do we want to do a lot of slicing)
        next = iter(fields).next
        # Move the iterator to the current position
        for x in xrange(cur):
            next()
        # The two blocks here are deliberate: the root block and the
        # contents-of-root block.
        state._dirblocks = [('', []), ('', [])]
        current_block = state._dirblocks[0][1]
        current_dirname = ''
        append_entry = current_block.append
        for count in xrange(state._num_entries):
            dirname = next()
            name = next()
            file_id = next()
            if dirname != current_dirname:
                # new block - different dirname
                current_block = []
                current_dirname = dirname
                state._dirblocks.append((current_dirname, current_block))
                append_entry = current_block.append
            # we know current_dirname == dirname, so re-use it to avoid
            # creating new strings
            entry = ((current_dirname, name, file_id),
                     [(# Current Tree
                         next(),                # minikind
                         next(),                # fingerprint
                         _int(next()),          # size
                         next() == 'y',         # executable
                         next(),                # packed_stat or revision_id
                     ),
                     ( # Parent 1
                         next(),                # minikind
                         next(),                # fingerprint
                         _int(next()),          # size
                         next() == 'y',         # executable
                         next(),                # packed_stat or revision_id
                     ),
                     ])
            trailing = next()
            assert trailing == '\n'
            # append the entry to the current block
            append_entry(entry)
        state._split_root_dirblock_into_contents()
    else:

        fields_to_entry = state._get_fields_to_entry()
        entries = []
        entries_append = entries.append
        pos = cur
        entry_size = entry_size
        while pos < field_count:
            entries_append(fields_to_entry(fields[pos:pos+entry_size]))
            pos = pos + entry_size
        state._entries_to_current_state(entries)
    # To convert from format 2  => format 3
    # state._dirblocks = sorted(state._dirblocks,
    #                          key=lambda blk:blk[0].split('/'))
    # To convert from format 3 => format 2
    # state._dirblocks = sorted(state._dirblocks)
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

2474.1.1 by John Arbash Meinel Create a Pyrex extension for reading the dirstate file.	1	# Copyright (C) 2007 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Helper functions for DirState.
	18
	19	This is the python implementation for DirState functions.
	20	"""
	21
	22	from bzrlib.dirstate import DirState
	23
	24
2474.1.7 by John Arbash Meinel Add some tests for a helper function that lets us	25	cdef extern from *:
	26	ctypedef int size_t
	27
2474.1.4 by John Arbash Meinel Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.	28	cdef extern from "Python.h":
	29	# GetItem returns a borrowed reference
	30	void *PyDict_GetItem(object p, object key)
	31	int PyDict_SetItem(object p, object key, object val) except -1
	32	object PyList_GetItem(object lst, int index)
	33	object PyTuple_GetItem(object tpl, int index)
	34
2474.1.7 by John Arbash Meinel Add some tests for a helper function that lets us	35	char* PyString_AsString(object p)
	36	int PyString_Size(object p)
	37
	38
	39	cdef extern from "string.h":
	40	int strncmp(char s1, char s2, size_t len)
	41	int strcmp(char s1, char s2)
	42	char strchr(char s1, char c)
	43
2474.1.4 by John Arbash Meinel Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.	44
	45	cdef object _split_from_path(object cache, object path):
	46	"""get the dirblock tuple for a given path.
	47
	48	:param cache: A Dictionary mapping string paths to tuples
	49	:param path: The path we care about.
	50	:return: A borrowed reference to a tuple stored in cache.
	51	You do not need to Py_DECREF() when you are done, unless you plan on
	52	using it for a while.
	53	"""
	54	cdef void* value_ptr
	55	cdef object value
	56
	57	value_ptr = PyDict_GetItem(cache, path)
	58	if value_ptr == NULL:
	59	value = path.split('/')
	60	PyDict_SetItem(cache, path, value)
	61	else:
	62	value = <object>value_ptr
	63
	64	return value
	65
	66
2474.1.5 by John Arbash Meinel Implement explicit handling of the no-cache version, which is even faster.	67	cdef int _bisect_dirblock_nocache(object dirblocks, object dirname, int _lo, int _hi):
	68	cdef int _mid
	69	cdef object cur
	70	cdef object cur_split
	71	cdef object dirname_split
	72
	73	dirname_split = dirname.split('/')
	74
	75	while _lo < _hi:
	76	_mid = (_lo+_hi)/2
	77	# Grab the dirname for the current dirblock
	78	cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
	79	cur_split = cur.split('/')
	80	if cur_split < dirname_split: _lo = _mid+1
	81	else: _hi = _mid
	82	return _lo
	83
	84
2474.1.7 by John Arbash Meinel Add some tests for a helper function that lets us	85	cdef int _cmp_dirblock_strings(char path1, int size1, char path2, int size2):
	86	"""This compares 2 strings separating on path sections.
	87
	88	This is equivalent to "cmp(path1.split('/'), path2.split('/'))"
	89	However, we don't want to create an extra object for doing the split.
	90
	91	:param path1: The first path to compare
	92	:param size1: The length of the first path
	93	:param path2: The second path
	94	:param size1: The length of the second path
	95	:return: 0 if they are equal, -1 if path1 comes first, 1 if path2 comes
	96	first
	97	"""
	98	cdef char *base1
	99	cdef char *base2
	100	cdef char *tip1
	101	cdef char *tip2
	102	cdef char *end1
	103	cdef char *end2
	104	cdef int cur_len1
	105	cdef int cur_len2
	106	cdef int cmp_len
	107	cdef int diff
	108
	109	base1 = path1
	110	base2 = path2
	111	end1 = base1 + size1
	112	end2 = base2 + size2
	113
	114	# Ensure that we are pointing to the final NULL terminator on both ends
	115	assert end1[0] == c'\x00'
	116	assert end2[0] == c'\x00'
	117
	118	while base1 < end1 and base2 < end2:
	119	# Find the next path separator
	120	# (This is where you would like strchrnul)
	121	tip1 = strchr(base1, c'/')
	122	tip2 = strchr(base2, c'/')
	123
	124	if tip1 == NULL:
	125	tip1 = end1
	126	if tip2 == NULL:
	127	tip2 = end2
	128
	129	cur_len1 = tip1 - base1
	130	cur_len2 = tip2 - base2
	131	cmp_len = cur_len1
	132	if cur_len2 < cur_len1:
	133	cmp_len = cur_len2
	134
	135	diff = strncmp(base1, base2, cmp_len)
	136	# print 'comparing "%s", "%s", %d = %d' % (base1, base2, cmp_len, diff)
	137	if diff != 0:
	138	return diff
	139	if cur_len1 < cur_len2:
	140	return -1
	141	elif cur_len1 > cur_len2:
	142	return 1
	143	base1 = tip1+1
	144	base2 = tip2+1
	145	# Do we still have uncompared characters?
	146	if base1 < end1:
	147	return 1
	148	if base2 < end2:
149	return -1
150	return 0
151
152
153	def cmp_dirblock_strings(path1, path2):
154	"""Compare to python strings in dirblock fashion."""
155	return _cmp_dirblock_strings(PyString_AsString(path1),
156	PyString_Size(path1),
157	PyString_AsString(path2),
158	PyString_Size(path2))
159
160
2474.1.5 by John Arbash Meinel Implement explicit handling of the no-cache version, which is even faster.	161	def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
2474.1.4 by John Arbash Meinel Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.	162	"""Return the index where to insert dirname into the dirblocks.
	163
	164	The return value idx is such that all directories blocks in dirblock[:idx]
	165	have names < dirname, and all blocks in dirblock[idx:] have names >=
	166	dirname.
	167
	168	Optional args lo (default 0) and hi (default len(dirblocks)) bound the
	169	slice of a to be searched.
	170	"""
	171	cdef int _lo
	172	cdef int _hi
	173	cdef int _mid
	174	cdef object dirname_split
	175	cdef object cur_split
	176
	177	if hi is None:
	178	_hi = len(dirblocks)
	179	else:
	180	_hi = hi
	181
	182	_lo = lo
2474.1.5 by John Arbash Meinel Implement explicit handling of the no-cache version, which is even faster.	183	if cache is None:
	184	return _bisect_dirblock_nocache(dirblocks, dirname, _lo, _hi)
	185
2474.1.4 by John Arbash Meinel Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.	186	dirname_split = _split_from_path(cache, dirname)
	187	while _lo < _hi:
	188	_mid = (_lo+_hi)/2
	189	# Grab the dirname for the current dirblock
	190	cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
	191	cur_split = _split_from_path(cache, cur)
	192	if cur_split < dirname_split: _lo = _mid+1
	193	else: _hi = _mid
	194	return _lo
	195
	196
2474.1.1 by John Arbash Meinel Create a Pyrex extension for reading the dirstate file.	197	def _read_dirblocks(state):
	198	"""Read in the dirblocks for the given DirState object.
	199
	200	This is tightly bound to the DirState internal representation. It should be
	201	thought of as a member function, which is only separated out so that we can
	202	re-write it in pyrex.
	203
	204	:param state: A DirState object.
	205	:return: None
	206	"""
	207	cdef int pos
	208	cdef int entry_size
	209	cdef int field_count
	210
	211	state._state_file.seek(state._end_of_header)
	212	text = state._state_file.read()
	213	# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
	214
	215	fields = text.split('\0')
	216	# Remove the last blank entry
	217	trailing = fields.pop()
	218	assert trailing == ''
	219	# consider turning fields into a tuple.
	220
	221	# skip the first field which is the trailing null from the header.
	222	cur = 1
	223	# Each line now has an extra '\n' field which is not used
	224	# so we just skip over it
	225	# entry size:
	226	# 3 fields for the key
	227	# + number of fields per tree_data (5) * tree count
	228	# + newline
	229	num_present_parents = state._num_present_parents()
	230	tree_count = 1 + num_present_parents
	231	entry_size = state._fields_per_entry()
	232	expected_field_count = entry_size * state._num_entries
	233	field_count = len(fields)
	234	# this checks our adjustment, and also catches file too short.
	235	assert field_count - cur == expected_field_count, \
	236	'field count incorrect %s != %s, entry_size=%s, '\
	237	'num_entries=%s fields=%r' % (
	238	field_count - cur, expected_field_count, entry_size,
	239	state._num_entries, fields)
	240
	241	if num_present_parents == 1:
	242	# Bind external functions to local names
	243	_int = int
	244	# We access all fields in order, so we can just iterate over
	245	# them. Grab an straight iterator over the fields. (We use an
	246	# iterator because we don't want to do a lot of additions, nor
	247	# do we want to do a lot of slicing)
	248	next = iter(fields).next
	249	# Move the iterator to the current position
	250	for x in xrange(cur):
	251	next()
	252	# The two blocks here are deliberate: the root block and the
	253	# contents-of-root block.
	254	state._dirblocks = [('', []), ('', [])]
	255	current_block = state._dirblocks[0][1]
	256	current_dirname = ''
	257	append_entry = current_block.append
	258	for count in xrange(state._num_entries):
	259	dirname = next()
	260	name = next()
261	file_id = next()
262	if dirname != current_dirname:
263	# new block - different dirname
264	current_block = []
265	current_dirname = dirname
266	state._dirblocks.append((current_dirname, current_block))
267	append_entry = current_block.append
268	# we know current_dirname == dirname, so re-use it to avoid
269	# creating new strings
270	entry = ((current_dirname, name, file_id),
271	[(# Current Tree
272	next(), # minikind
273	next(), # fingerprint
274	_int(next()), # size
275	next() == 'y', # executable
276	next(), # packed_stat or revision_id
277	),
278	( # Parent 1
279	next(), # minikind
280	next(), # fingerprint
281	_int(next()), # size
282	next() == 'y', # executable
283	next(), # packed_stat or revision_id
284	),
285	])
286	trailing = next()
287	assert trailing == '\n'
288	# append the entry to the current block
289	append_entry(entry)
290	state._split_root_dirblock_into_contents()
291	else:
292
293	fields_to_entry = state._get_fields_to_entry()
294	entries = []
295	entries_append = entries.append
296	pos = cur
297	entry_size = entry_size
298	while pos < field_count:
299	entries_append(fields_to_entry(fields[pos:pos+entry_size]))
300	pos = pos + entry_size
301	state._entries_to_current_state(entries)
302	# To convert from format 2 => format 3
303	# state._dirblocks = sorted(state._dirblocks,
304	# key=lambda blk:blk[0].split('/'))
305	# To convert from format 3 => format 2
306	# state._dirblocks = sorted(state._dirblocks)
307	state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED