~bzr-pqm/bzr/bzr.dev : contents of bzrlib/_dirstate_helpers

~bzr-pqm/bzr/bzr.dev : (revision 6338)
# Copyright (C) 2007, 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Python implementations of Dirstate Helper functions."""

import binascii
import os
import struct

# We cannot import the dirstate module, because it loads this module
# All we really need is the IN_MEMORY_MODIFIED constant
from bzrlib import errors
from bzrlib.dirstate import DirState


def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack):
    """Convert stat values into a packed representation

    Not all of the fields from the stat included are strictly needed, and by
    just encoding the mtime and mode a slight speed increase could be gained.
    However, using the pyrex version instead is a bigger win.
    """
    # base64 encoding always adds a final newline, so strip it off
    return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF,
        int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF,
        st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]


def _unpack_stat(packed_stat):
    """Turn a packed_stat back into the stat fields.

    This is meant as a debugging tool, should not be used in real code.
    """
    (st_size, st_mtime, st_ctime, st_dev, st_ino,
     st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat))
    return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime,
                st_dev=st_dev, st_ino=st_ino, st_mode=st_mode)


def _bisect_path_left(paths, path):
    """Return the index where to insert path into paths.

    This uses the dirblock sorting. So all children in a directory come before
    the children of children. For example::

        a/
          b/
            c
          d/
            e
          b-c
          d-e
        a-a
        a=c

    Will be sorted as::

        a
        a-a
        a=c
        a/b
        a/b-c
        a/d
        a/d-e
        a/b/c
        a/d/e

    :param paths: A list of paths to search through
    :param path: A single path to insert
    :return: An offset where 'path' can be inserted.
    :seealso: bisect.bisect_left
    """
    hi = len(paths)
    lo = 0
    while lo < hi:
        mid = (lo + hi) // 2
        # Grab the dirname for the current dirblock
        cur = paths[mid]
        if _cmp_path_by_dirblock(cur, path) < 0:
            lo = mid + 1
        else:
            hi = mid
    return lo


def _bisect_path_right(paths, path):
    """Return the index where to insert path into paths.

    This uses a path-wise comparison so we get::
        a
        a-b
        a=b
        a/b
    Rather than::
        a
        a-b
        a/b
        a=b
    :param paths: A list of paths to search through
    :param path: A single path to insert
    :return: An offset where 'path' can be inserted.
    :seealso: bisect.bisect_right
    """
    hi = len(paths)
    lo = 0
    while lo < hi:
        mid = (lo+hi)//2
        # Grab the dirname for the current dirblock
        cur = paths[mid]
        if _cmp_path_by_dirblock(path, cur) < 0:
            hi = mid
        else:
            lo = mid + 1
    return lo


def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
    """Return the index where to insert dirname into the dirblocks.

    The return value idx is such that all directories blocks in dirblock[:idx]
    have names < dirname, and all blocks in dirblock[idx:] have names >=
    dirname.

    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
    slice of a to be searched.
    """
    if hi is None:
        hi = len(dirblocks)
    try:
        dirname_split = cache[dirname]
    except KeyError:
        dirname_split = dirname.split('/')
        cache[dirname] = dirname_split
    while lo < hi:
        mid = (lo + hi) // 2
        # Grab the dirname for the current dirblock
        cur = dirblocks[mid][0]
        try:
            cur_split = cache[cur]
        except KeyError:
            cur_split = cur.split('/')
            cache[cur] = cur_split
        if cur_split < dirname_split: lo = mid + 1
        else: hi = mid
    return lo


def cmp_by_dirs(path1, path2):
    """Compare two paths directory by directory.

    This is equivalent to doing::

       cmp(path1.split('/'), path2.split('/'))

    The idea is that you should compare path components separately. This
    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.

    :param path1: first path
    :param path2: second path
    :return: negative number if ``path1`` comes first,
        0 if paths are equal,
        and positive number if ``path2`` sorts first
    """
    if not isinstance(path1, str):
        raise TypeError("'path1' must be a plain string, not %s: %r"
                        % (type(path1), path1))
    if not isinstance(path2, str):
        raise TypeError("'path2' must be a plain string, not %s: %r"
                        % (type(path2), path2))
    return cmp(path1.split('/'), path2.split('/'))


def _cmp_path_by_dirblock(path1, path2):
    """Compare two paths based on what directory they are in.

    This generates a sort order, such that all children of a directory are
    sorted together, and grandchildren are in the same order as the
    children appear. But all grandchildren come after all children.

    :param path1: first path
    :param path2: the second path
    :return: negative number if ``path1`` comes first,
        0 if paths are equal
        and a positive number if ``path2`` sorts first
    """
    if not isinstance(path1, str):
        raise TypeError("'path1' must be a plain string, not %s: %r"
                        % (type(path1), path1))
    if not isinstance(path2, str):
        raise TypeError("'path2' must be a plain string, not %s: %r"
                        % (type(path2), path2))
    dirname1, basename1 = os.path.split(path1)
    key1 = (dirname1.split('/'), basename1)
    dirname2, basename2 = os.path.split(path2)
    key2 = (dirname2.split('/'), basename2)
    return cmp(key1, key2)


def _read_dirblocks(state):
    """Read in the dirblocks for the given DirState object.

    This is tightly bound to the DirState internal representation. It should be
    thought of as a member function, which is only separated out so that we can
    re-write it in pyrex.

    :param state: A DirState object.
    :return: None
    """
    state._state_file.seek(state._end_of_header)
    text = state._state_file.read()
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)

    fields = text.split('\0')
    # Remove the last blank entry
    trailing = fields.pop()
    if trailing != '':
        raise errors.DirstateCorrupt(state,
            'trailing garbage: %r' % (trailing,))
    # consider turning fields into a tuple.

    # skip the first field which is the trailing null from the header.
    cur = 1
    # Each line now has an extra '\n' field which is not used
    # so we just skip over it
    # entry size:
    #  3 fields for the key
    #  + number of fields per tree_data (5) * tree count
    #  + newline
    num_present_parents = state._num_present_parents()
    tree_count = 1 + num_present_parents
    entry_size = state._fields_per_entry()
    expected_field_count = entry_size * state._num_entries
    field_count = len(fields)
    # this checks our adjustment, and also catches file too short.
    if field_count - cur != expected_field_count:
        raise errors.DirstateCorrupt(state,
            'field count incorrect %s != %s, entry_size=%s, '\
            'num_entries=%s fields=%r' % (
            field_count - cur, expected_field_count, entry_size,
            state._num_entries, fields))

    if num_present_parents == 1:
        # Bind external functions to local names
        _int = int
        # We access all fields in order, so we can just iterate over
        # them. Grab an straight iterator over the fields. (We use an
        # iterator because we don't want to do a lot of additions, nor
        # do we want to do a lot of slicing)
        next = iter(fields).next
        # Move the iterator to the current position
        for x in xrange(cur):
            next()
        # The two blocks here are deliberate: the root block and the
        # contents-of-root block.
        state._dirblocks = [('', []), ('', [])]
        current_block = state._dirblocks[0][1]
        current_dirname = ''
        append_entry = current_block.append
        for count in xrange(state._num_entries):
            dirname = next()
            name = next()
            file_id = next()
            if dirname != current_dirname:
                # new block - different dirname
                current_block = []
                current_dirname = dirname
                state._dirblocks.append((current_dirname, current_block))
                append_entry = current_block.append
            # we know current_dirname == dirname, so re-use it to avoid
            # creating new strings
            entry = ((current_dirname, name, file_id),
                     [(# Current Tree
                         next(),                # minikind
                         next(),                # fingerprint
                         _int(next()),          # size
                         next() == 'y',         # executable
                         next(),                # packed_stat or revision_id
                     ),
                     ( # Parent 1
                         next(),                # minikind
                         next(),                # fingerprint
                         _int(next()),          # size
                         next() == 'y',         # executable
                         next(),                # packed_stat or revision_id
                     ),
                     ])
            trailing = next()
            if trailing != '\n':
                raise ValueError("trailing garbage in dirstate: %r" % trailing)
            # append the entry to the current block
            append_entry(entry)
        state._split_root_dirblock_into_contents()
    else:
        fields_to_entry = state._get_fields_to_entry()
        entries = [fields_to_entry(fields[pos:pos+entry_size])
                   for pos in xrange(cur, field_count, entry_size)]
        state._entries_to_current_state(entries)
    # To convert from format 2  => format 3
    # state._dirblocks = sorted(state._dirblocks,
    #                          key=lambda blk:blk[0].split('/'))
    # To convert from format 3 => format 2
    # state._dirblocks = sorted(state._dirblocks)
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED