~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_pyx.pyx

(vila) Open 2.4.3 for bug fixes (Vincent Ladeuil)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""Helper functions for DirState.
18
18
 
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import pathjoin, splitpath
 
31
from bzrlib.osutils import parent_directories, pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_c.h":
 
57
cdef extern from "_dirstate_helpers_pyx.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
97
97
    object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index)
98
98
    object PyTuple_GET_ITEM(object tpl, Py_ssize_t index)
99
99
 
 
100
    unsigned long PyInt_AsUnsignedLongMask(object number) except? -1
100
101
 
101
102
    char *PyString_AsString(object p)
102
103
    char *PyString_AsString_obj "PyString_AsString" (PyObject *string)
118
119
    # ??? memrchr is a GNU extension :(
119
120
    # void *memrchr(void *s, int c, size_t len)
120
121
 
121
 
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n):
 
122
# cimport all of the definitions we will need to access
 
123
from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
 
124
    StaticTuple_New, StaticTuple_SET_ITEM
 
125
 
 
126
import_static_tuple_c()
 
127
 
 
128
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
123
129
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
130
    cdef char *pos
125
131
    cdef char *start
156
162
        return None
157
163
    return <char*>found - <char*>_s
158
164
 
 
165
 
159
166
cdef object safe_string_from_size(char *s, Py_ssize_t size):
160
167
    if size < 0:
161
 
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
168
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d @0x%x'
164
 
            % (size, <int>s))
 
169
            'tried to create a string with an invalid size: %d'
 
170
            % (size))
165
171
    return PyString_FromStringAndSize(s, size)
166
172
 
167
173
 
168
 
cdef int _is_aligned(void *ptr):
 
174
cdef int _is_aligned(void *ptr): # cannot_raise
169
175
    """Is this pointer aligned to an integer size offset?
170
176
 
171
177
    :return: 1 if this pointer is aligned, 0 otherwise.
173
179
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
180
 
175
181
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
 
182
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
177
183
    cdef unsigned char *cur1
178
184
    cdef unsigned char *cur2
179
185
    cdef unsigned char *end1
237
243
    return 0
238
244
 
239
245
 
240
 
def cmp_by_dirs_c(path1, path2):
 
246
def cmp_by_dirs(path1, path2):
241
247
    """Compare two paths directory by directory.
242
248
 
243
249
    This is equivalent to doing::
266
272
                        PyString_Size(path2))
267
273
 
268
274
 
269
 
def _cmp_path_by_dirblock_c(path1, path2):
 
275
def _cmp_path_by_dirblock(path1, path2):
270
276
    """Compare two paths based on what directory they are in.
271
277
 
272
278
    This generates a sort order, such that all children of a directory are
288
294
    if not PyString_CheckExact(path2):
289
295
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
296
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock(PyString_AsString(path1),
292
 
                                 PyString_Size(path1),
293
 
                                 PyString_AsString(path2),
294
 
                                 PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
 
                               char *path2, int path2_len):
 
297
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
 
298
                                        PyString_Size(path1),
 
299
                                        PyString_AsString(path2),
 
300
                                        PyString_Size(path2))
 
301
 
 
302
 
 
303
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
 
304
                                      char *path2, int path2_len): # cannot_raise
299
305
    """Compare two paths by what directory they are in.
300
306
 
301
 
    see ``_cmp_path_by_dirblock_c`` for details.
 
307
    see ``_cmp_path_by_dirblock`` for details.
302
308
    """
303
309
    cdef char *dirname1
304
310
    cdef int dirname1_len
368
374
    return 1
369
375
 
370
376
 
371
 
def _bisect_path_left_c(paths, path):
 
377
def _bisect_path_left(paths, path):
372
378
    """Return the index where to insert path into paths.
373
379
 
374
380
    This uses a path-wise comparison so we get::
413
419
        cur = PyList_GetItem_object_void(paths, _mid)
414
420
        cur_cstr = PyString_AS_STRING_void(cur)
415
421
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
 
422
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
 
423
                                        path_cstr, path_size) < 0:
417
424
            _lo = _mid + 1
418
425
        else:
419
426
            _hi = _mid
420
427
    return _lo
421
428
 
422
429
 
423
 
def _bisect_path_right_c(paths, path):
 
430
def _bisect_path_right(paths, path):
424
431
    """Return the index where to insert path into paths.
425
432
 
426
433
    This uses a path-wise comparison so we get::
465
472
        cur = PyList_GetItem_object_void(paths, _mid)
466
473
        cur_cstr = PyString_AS_STRING_void(cur)
467
474
        cur_size = PyString_GET_SIZE_void(cur)
468
 
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
 
475
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
 
476
                                        cur_cstr, cur_size) < 0:
469
477
            _hi = _mid
470
478
        else:
471
479
            _lo = _mid + 1
472
480
    return _lo
473
481
 
474
482
 
475
 
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
 
483
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
476
484
    """Return the index where to insert dirname into the dirblocks.
477
485
 
478
486
    The return value idx is such that all directories blocks in dirblock[:idx]
608
616
        :param new_block: This is to let the caller know that it needs to
609
617
            create a new directory block to store the next entry.
610
618
        """
611
 
        cdef object path_name_file_id_key
 
619
        cdef StaticTuple path_name_file_id_key
 
620
        cdef StaticTuple tmp
612
621
        cdef char *entry_size_cstr
613
622
        cdef unsigned long int entry_size
614
623
        cdef char* executable_cstr
648
657
        # Build up the key that will be used.
649
658
        # By using <object>(void *) Pyrex will automatically handle the
650
659
        # Py_INCREF that we need.
651
 
        path_name_file_id_key = (<object>p_current_dirname[0],
652
 
                                 self.get_next_str(),
653
 
                                 self.get_next_str(),
654
 
                                )
 
660
        cur_dirname = <object>p_current_dirname[0]
 
661
        # Use StaticTuple_New to pre-allocate, rather than creating a regular
 
662
        # tuple and passing it to the StaticTuple constructor.
 
663
        # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
 
664
        #                          self.get_next_str(),
 
665
        #                          self.get_next_str(),
 
666
        #                         )
 
667
        tmp = StaticTuple_New(3)
 
668
        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
 
669
        cur_basename = self.get_next_str()
 
670
        cur_file_id = self.get_next_str()
 
671
        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
 
672
        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
 
673
        path_name_file_id_key = tmp
655
674
 
656
675
        # Parse all of the per-tree information. current has the information in
657
676
        # the same location as parent trees. The only difference is that 'info'
675
694
            executable_cstr = self.get_next(&cur_size)
676
695
            is_executable = (executable_cstr[0] == c'y')
677
696
            info = self.get_next_str()
678
 
            PyList_Append(trees, (
 
697
            # TODO: If we want to use StaticTuple_New here we need to be pretty
 
698
            #       careful. We are relying on a bit of Pyrex
 
699
            #       automatic-conversion from 'int' to PyInt, and that doesn't
 
700
            #       play well with the StaticTuple_SET_ITEM macro.
 
701
            #       Timing doesn't (yet) show a worthwile improvement in speed
 
702
            #       versus complexity and maintainability.
 
703
            # tmp = StaticTuple_New(5)
 
704
            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
 
705
            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
 
706
            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
 
707
            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
 
708
            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
 
709
            # PyList_Append(trees, tmp)
 
710
            PyList_Append(trees, StaticTuple(
679
711
                minikind,     # minikind
680
712
                fingerprint,  # fingerprint
681
713
                entry_size,   # size
744
776
        self.state._split_root_dirblock_into_contents()
745
777
 
746
778
 
747
 
def _read_dirblocks_c(state):
 
779
def _read_dirblocks(state):
748
780
    """Read in the dirblocks for the given DirState object.
749
781
 
750
782
    This is tightly bound to the DirState internal representation. It should be
766
798
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
767
799
 
768
800
 
769
 
cdef int minikind_from_mode(int mode):
 
801
cdef int minikind_from_mode(int mode): # cannot_raise
770
802
    # in order of frequency:
771
803
    if S_ISREG(mode):
772
804
        return c"f"
790
822
    cdef char result[6*4] # 6 long ints
791
823
    cdef int *aliased
792
824
    aliased = <int *>result
793
 
    aliased[0] = htonl(stat_value.st_size)
794
 
    aliased[1] = htonl(int(stat_value.st_mtime))
795
 
    aliased[2] = htonl(int(stat_value.st_ctime))
796
 
    aliased[3] = htonl(stat_value.st_dev)
797
 
    aliased[4] = htonl(stat_value.st_ino & 0xFFFFFFFF)
798
 
    aliased[5] = htonl(stat_value.st_mode)
 
825
    aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
 
826
    # mtime and ctime will often be floats but get converted to PyInt within
 
827
    aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
 
828
    aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
 
829
    aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
 
830
    aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
 
831
    aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
799
832
    packed = PyString_FromStringAndSize(result, 6*4)
800
833
    return _encode(packed)[:-1]
801
834
 
835
868
    # _st mode of the compiled stat objects.
836
869
    cdef int minikind, saved_minikind
837
870
    cdef void * details
 
871
    cdef int worth_saving
838
872
    minikind = minikind_from_mode(stat_value.st_mode)
839
873
    if 0 == minikind:
840
874
        return None
869
903
    # If we have gotten this far, that means that we need to actually
870
904
    # process this entry.
871
905
    link_or_sha1 = None
 
906
    worth_saving = 1
872
907
    if minikind == c'f':
873
908
        executable = self._is_executable(stat_value.st_mode,
874
909
                                         saved_executable)
885
920
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
886
921
                           executable, packed_stat)
887
922
        else:
888
 
            entry[1][0] = ('f', '', stat_value.st_size,
889
 
                           executable, DirState.NULLSTAT)
 
923
            # This file is not worth caching the sha1. Either it is too new, or
 
924
            # it is newly added. Regardless, the only things we are changing
 
925
            # are derived from the stat, and so are not worth caching. So we do
 
926
            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
 
927
            # updated values if there is *other* data worth saving.)
 
928
            entry[1][0] = ('f', '', stat_value.st_size, executable,
 
929
                           DirState.NULLSTAT)
 
930
            worth_saving = 0
890
931
    elif minikind == c'd':
891
 
        link_or_sha1 = None
892
932
        entry[1][0] = ('d', '', 0, False, packed_stat)
893
933
        if saved_minikind != c'd':
894
934
            # This changed from something into a directory. Make sure we
898
938
                self._get_block_entry_index(entry[0][0], entry[0][1], 0)
899
939
            self._ensure_block(block_index, entry_index,
900
940
                               pathjoin(entry[0][0], entry[0][1]))
 
941
        else:
 
942
            # Any changes are derived trivially from the stat object, not worth
 
943
            # re-writing a dirstate for just this
 
944
            worth_saving = 0
901
945
    elif minikind == c'l':
 
946
        if saved_minikind == c'l':
 
947
            # If the object hasn't changed kind, it isn't worth saving the
 
948
            # dirstate just for a symlink. The default is 'fast symlinks' which
 
949
            # save the target in the inode entry, rather than separately. So to
 
950
            # stat, we've already read everything off disk.
 
951
            worth_saving = 0
902
952
        link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
903
953
        if self._cutoff_time is None:
904
954
            self._sha_cutoff_time()
909
959
        else:
910
960
            entry[1][0] = ('l', '', stat_value.st_size,
911
961
                           False, DirState.NULLSTAT)
912
 
    self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
962
    if worth_saving:
 
963
        # Note, even though _mark_modified will only set
 
964
        # IN_MEMORY_HASH_MODIFIED, it still isn't worth 
 
965
        self._mark_modified([entry])
913
966
    return link_or_sha1
914
967
 
915
968
 
916
 
cdef char _minikind_from_string(object string):
 
969
# TODO: Do we want to worry about exceptions here?
 
970
cdef char _minikind_from_string(object string) except? -1:
917
971
    """Convert a python string to a char."""
918
972
    return PyString_AsString(string)[0]
919
973
 
951
1005
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
952
1006
 
953
1007
 
954
 
cdef int _versioned_minikind(char minikind):
 
1008
cdef int _versioned_minikind(char minikind): # cannot_raise
955
1009
    """Return non-zero if minikind is in fltd"""
956
1010
    return (minikind == c'f' or
957
1011
            minikind == c'd' or
961
1015
 
962
1016
cdef class ProcessEntryC:
963
1017
 
 
1018
    cdef int doing_consistency_expansion
964
1019
    cdef object old_dirname_to_file_id # dict
965
1020
    cdef object new_dirname_to_file_id # dict
966
 
    cdef readonly object uninteresting
967
1021
    cdef object last_source_parent
968
1022
    cdef object last_target_parent
969
 
    cdef object include_unchanged
 
1023
    cdef int include_unchanged
 
1024
    cdef int partial
970
1025
    cdef object use_filesystem_for_exec
971
1026
    cdef object utf8_decode
972
1027
    cdef readonly object searched_specific_files
 
1028
    cdef readonly object searched_exact_paths
973
1029
    cdef object search_specific_files
 
1030
    # The parents up to the root of the paths we are searching.
 
1031
    # After all normal paths are returned, these specific items are returned.
 
1032
    cdef object search_specific_file_parents
974
1033
    cdef object state
975
1034
    # Current iteration variables:
976
1035
    cdef object current_root
988
1047
    cdef object current_block_list
989
1048
    cdef object current_dir_info
990
1049
    cdef object current_dir_list
 
1050
    cdef object _pending_consistent_entries # list
991
1051
    cdef int path_index
992
1052
    cdef object root_dir_info
993
1053
    cdef object bisect_left
994
1054
    cdef object pathjoin
995
1055
    cdef object fstat
 
1056
    # A set of the ids we've output when doing partial output.
 
1057
    cdef object seen_ids
996
1058
    cdef object sha_file
997
1059
 
998
1060
    def __init__(self, include_unchanged, use_filesystem_for_exec,
999
1061
        search_specific_files, state, source_index, target_index,
1000
1062
        want_unversioned, tree):
 
1063
        self.doing_consistency_expansion = 0
1001
1064
        self.old_dirname_to_file_id = {}
1002
1065
        self.new_dirname_to_file_id = {}
1003
 
        # Just a sentry, so that _process_entry can say that this
1004
 
        # record is handled, but isn't interesting to process (unchanged)
1005
 
        self.uninteresting = object()
 
1066
        # Are we doing a partial iter_changes?
 
1067
        self.partial = set(['']).__ne__(search_specific_files)
1006
1068
        # Using a list so that we can access the values and change them in
1007
1069
        # nested scope. Each one is [path, file_id, entry]
1008
1070
        self.last_source_parent = [None, None]
1009
1071
        self.last_target_parent = [None, None]
1010
 
        self.include_unchanged = include_unchanged
 
1072
        if include_unchanged is None:
 
1073
            self.include_unchanged = False
 
1074
        else:
 
1075
            self.include_unchanged = int(include_unchanged)
1011
1076
        self.use_filesystem_for_exec = use_filesystem_for_exec
1012
1077
        self.utf8_decode = cache_utf8._utf8_decode
1013
1078
        # for all search_indexs in each path at or under each element of
1014
 
        # search_specific_files, if the detail is relocated: add the id, and add the
1015
 
        # relocated path as one to search if its not searched already. If the
1016
 
        # detail is not relocated, add the id.
 
1079
        # search_specific_files, if the detail is relocated: add the id, and
 
1080
        # add the relocated path as one to search if its not searched already.
 
1081
        # If the detail is not relocated, add the id.
1017
1082
        self.searched_specific_files = set()
 
1083
        # When we search exact paths without expanding downwards, we record
 
1084
        # that here.
 
1085
        self.searched_exact_paths = set()
1018
1086
        self.search_specific_files = search_specific_files
 
1087
        # The parents up to the root of the paths we are searching.
 
1088
        # After all normal paths are returned, these specific items are returned.
 
1089
        self.search_specific_file_parents = set()
 
1090
        # The ids we've sent out in the delta.
 
1091
        self.seen_ids = set()
1019
1092
        self.state = state
1020
1093
        self.current_root = None
1021
1094
        self.current_root_unicode = None
1037
1110
        self.current_block_pos = -1
1038
1111
        self.current_dir_info = None
1039
1112
        self.current_dir_list = None
 
1113
        self._pending_consistent_entries = []
1040
1114
        self.path_index = 0
1041
1115
        self.root_dir_info = None
1042
1116
        self.bisect_left = bisect.bisect_left
1043
1117
        self.pathjoin = osutils.pathjoin
1044
1118
        self.fstat = os.fstat
1045
1119
        self.sha_file = osutils.sha_file
 
1120
        if target_index != 0:
 
1121
            # A lot of code in here depends on target_index == 0
 
1122
            raise errors.BzrError('unsupported target index')
1046
1123
 
1047
1124
    cdef _process_entry(self, entry, path_info):
1048
1125
        """Compare an entry and real disk to generate delta information.
1049
1126
 
1050
1127
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1051
 
            the path of entry. If None, then the path is considered absent.
1052
 
            (Perhaps we should pass in a concrete entry for this ?)
 
1128
            the path of entry. If None, then the path is considered absent in 
 
1129
            the target (Perhaps we should pass in a concrete entry for this ?)
1053
1130
            Basename is returned as a utf8 string because we expect this
1054
1131
            tuple will be ignored, and don't want to take the time to
1055
1132
            decode.
1056
 
        :return: None if the these don't match
1057
 
                 A tuple of information about the change, or
1058
 
                 the object 'uninteresting' if these match, but are
1059
 
                 basically identical.
 
1133
        :return: (iter_changes_result, changed). If the entry has not been
 
1134
            handled then changed is None. Otherwise it is False if no content
 
1135
            or metadata changes have occured, and True if any content or
 
1136
            metadata change has occurred. If self.include_unchanged is True then
 
1137
            if changed is not None, iter_changes_result will always be a result
 
1138
            tuple. Otherwise, iter_changes_result is None unless changed is
 
1139
            True.
1060
1140
        """
1061
1141
        cdef char target_minikind
1062
1142
        cdef char source_minikind
1098
1178
            else:
1099
1179
                # add the source to the search path to find any children it
1100
1180
                # has.  TODO ? : only add if it is a container ?
1101
 
                if not osutils.is_inside_any(self.searched_specific_files,
1102
 
                                             source_details[1]):
 
1181
                if (not self.doing_consistency_expansion and 
 
1182
                    not osutils.is_inside_any(self.searched_specific_files,
 
1183
                                             source_details[1])):
1103
1184
                    self.search_specific_files.add(source_details[1])
 
1185
                    # expanding from a user requested path, parent expansion
 
1186
                    # for delta consistency happens later.
1104
1187
                # generate the old path; this is needed for stating later
1105
1188
                # as well.
1106
1189
                old_path = source_details[1]
1140
1223
                    if source_minikind != c'f':
1141
1224
                        content_change = 1
1142
1225
                    else:
1143
 
                        # If the size is the same, check the sha:
1144
 
                        if target_details[2] == source_details[2]:
1145
 
                            if link_or_sha1 is None:
1146
 
                                # Stat cache miss:
1147
 
                                statvalue, link_or_sha1 = \
1148
 
                                    self.state._sha1_provider.stat_and_sha1(
1149
 
                                    path_info[4])
1150
 
                                self.state._observed_sha1(entry, link_or_sha1,
1151
 
                                    statvalue)
1152
 
                            content_change = (link_or_sha1 != source_details[1])
1153
 
                        else:
1154
 
                            # Size changed, so must be different
1155
 
                            content_change = 1
 
1226
                        # Check the sha. We can't just rely on the size as
 
1227
                        # content filtering may mean differ sizes actually
 
1228
                        # map to the same content
 
1229
                        if link_or_sha1 is None:
 
1230
                            # Stat cache miss:
 
1231
                            statvalue, link_or_sha1 = \
 
1232
                                self.state._sha1_provider.stat_and_sha1(
 
1233
                                path_info[4])
 
1234
                            self.state._observed_sha1(entry, link_or_sha1,
 
1235
                                statvalue)
 
1236
                        content_change = (link_or_sha1 != source_details[1])
1156
1237
                    # Target details is updated at update_entry time
1157
1238
                    if self.use_filesystem_for_exec:
1158
1239
                        # We don't need S_ISREG here, because we are sure
1173
1254
                        content_change = 0
1174
1255
                    target_exec = False
1175
1256
                else:
1176
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
1257
                    if path is None:
 
1258
                        path = self.pathjoin(old_dirname, old_basename)
 
1259
                    raise errors.BadFileKindError(path, path_info[2])
1177
1260
            if source_minikind == c'd':
1178
1261
                if path is None:
1179
1262
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1181
1264
                    file_id = entry[0][2]
1182
1265
                self.old_dirname_to_file_id[old_path] = file_id
1183
1266
            # parent id is the entry for the path in the target tree
1184
 
            if old_dirname == self.last_source_parent[0]:
 
1267
            if old_basename and old_dirname == self.last_source_parent[0]:
 
1268
                # use a cached hit for non-root source entries.
1185
1269
                source_parent_id = self.last_source_parent[1]
1186
1270
            else:
1187
1271
                try:
1188
1272
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
1189
 
                except KeyError:
 
1273
                except KeyError, _:
1190
1274
                    source_parent_entry = self.state._get_entry(self.source_index,
1191
1275
                                                           path_utf8=old_dirname)
1192
1276
                    source_parent_id = source_parent_entry[0][2]
1197
1281
                    self.last_source_parent[0] = old_dirname
1198
1282
                    self.last_source_parent[1] = source_parent_id
1199
1283
            new_dirname = entry[0][0]
1200
 
            if new_dirname == self.last_target_parent[0]:
 
1284
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
1285
                # use a cached hit for non-root target entries.
1201
1286
                target_parent_id = self.last_target_parent[1]
1202
1287
            else:
1203
1288
                try:
1204
1289
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
1205
 
                except KeyError:
 
1290
                except KeyError, _:
1206
1291
                    # TODO: We don't always need to do the lookup, because the
1207
1292
                    #       parent entry will be the same as the source entry.
1208
1293
                    target_parent_entry = self.state._get_entry(self.target_index,
1220
1305
                    self.last_target_parent[1] = target_parent_id
1221
1306
 
1222
1307
            source_exec = source_details[3]
1223
 
            if (self.include_unchanged
1224
 
                or content_change
 
1308
            changed = (content_change
1225
1309
                or source_parent_id != target_parent_id
1226
1310
                or old_basename != entry[0][1]
1227
1311
                or source_exec != target_exec
1228
 
                ):
 
1312
                )
 
1313
            if not changed and not self.include_unchanged:
 
1314
                return None, False
 
1315
            else:
1229
1316
                if old_path is None:
1230
1317
                    path = self.pathjoin(old_dirname, old_basename)
1231
1318
                    old_path = path
1245
1332
                       (source_parent_id, target_parent_id),
1246
1333
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1247
1334
                       (source_kind, target_kind),
1248
 
                       (source_exec, target_exec))
1249
 
            else:
1250
 
                return self.uninteresting
 
1335
                       (source_exec, target_exec)), changed
1251
1336
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1252
1337
            # looks like a new file
1253
1338
            path = self.pathjoin(entry[0][0], entry[0][1])
1280
1365
                       (None, parent_id),
1281
1366
                       (None, self.utf8_decode(entry[0][1])[0]),
1282
1367
                       (None, path_info[2]),
1283
 
                       (None, target_exec))
 
1368
                       (None, target_exec)), True
1284
1369
            else:
1285
1370
                # Its a missing file, report it as such.
1286
1371
                return (entry[0][2],
1290
1375
                       (None, parent_id),
1291
1376
                       (None, self.utf8_decode(entry[0][1])[0]),
1292
1377
                       (None, None),
1293
 
                       (None, False))
 
1378
                       (None, False)), True
1294
1379
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1295
1380
            # unversioned, possibly, or possibly not deleted: we dont care.
1296
1381
            # if its still on disk, *and* theres no other entry at this
1308
1393
                   (parent_id, None),
1309
1394
                   (self.utf8_decode(entry[0][1])[0], None),
1310
1395
                   (_minikind_to_kind(source_minikind), None),
1311
 
                   (source_details[3], None))
 
1396
                   (source_details[3], None)), True
1312
1397
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1313
1398
            # a rename; could be a true rename, or a rename inherited from
1314
1399
            # a renamed parent. TODO: handle this efficiently. Its not
1315
1400
            # common case to rename dirs though, so a correct but slow
1316
1401
            # implementation will do.
1317
 
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
1402
            if (not self.doing_consistency_expansion and 
 
1403
                not osutils.is_inside_any(self.searched_specific_files,
 
1404
                    target_details[1])):
1318
1405
                self.search_specific_files.add(target_details[1])
 
1406
                # We don't expand the specific files parents list here as
 
1407
                # the path is absent in target and won't create a delta with
 
1408
                # missing parent.
1319
1409
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1320
1410
              (target_minikind == c'r' or target_minikind == c'a')):
1321
1411
            # neither of the selected trees contain this path,
1327
1417
                "source_minikind=%r, target_minikind=%r"
1328
1418
                % (source_minikind, target_minikind))
1329
1419
            ## import pdb;pdb.set_trace()
1330
 
        return None
 
1420
        return None, None
1331
1421
 
1332
1422
    def __iter__(self):
1333
1423
        return self
1335
1425
    def iter_changes(self):
1336
1426
        return self
1337
1427
 
1338
 
    cdef void _update_current_block(self):
 
1428
    cdef int _gather_result_for_consistency(self, result) except -1:
 
1429
        """Check a result we will yield to make sure we are consistent later.
 
1430
        
 
1431
        This gathers result's parents into a set to output later.
 
1432
 
 
1433
        :param result: A result tuple.
 
1434
        """
 
1435
        if not self.partial or not result[0]:
 
1436
            return 0
 
1437
        self.seen_ids.add(result[0])
 
1438
        new_path = result[1][1]
 
1439
        if new_path:
 
1440
            # Not the root and not a delete: queue up the parents of the path.
 
1441
            self.search_specific_file_parents.update(
 
1442
                osutils.parent_directories(new_path.encode('utf8')))
 
1443
            # Add the root directory which parent_directories does not
 
1444
            # provide.
 
1445
            self.search_specific_file_parents.add('')
 
1446
        return 0
 
1447
 
 
1448
    cdef int _update_current_block(self) except -1:
1339
1449
        if (self.block_index < len(self.state._dirblocks) and
1340
1450
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1341
1451
            self.current_block = self.state._dirblocks[self.block_index]
1344
1454
        else:
1345
1455
            self.current_block = None
1346
1456
            self.current_block_list = None
 
1457
        return 0
1347
1458
 
1348
1459
    def __next__(self):
1349
1460
        # Simple thunk to allow tail recursion without pyrex confusion
1401
1512
        cdef char * current_dirname_c, * current_blockname_c
1402
1513
        cdef int advance_entry, advance_path
1403
1514
        cdef int path_handled
1404
 
        uninteresting = self.uninteresting
1405
1515
        searched_specific_files = self.searched_specific_files
1406
1516
        # Are we walking a root?
1407
1517
        while self.root_entries_pos < self.root_entries_len:
1408
1518
            entry = self.root_entries[self.root_entries_pos]
1409
1519
            self.root_entries_pos = self.root_entries_pos + 1
1410
 
            result = self._process_entry(entry, self.root_dir_info)
1411
 
            if result is not None and result is not self.uninteresting:
1412
 
                return result
 
1520
            result, changed = self._process_entry(entry, self.root_dir_info)
 
1521
            if changed is not None:
 
1522
                if changed:
 
1523
                    self._gather_result_for_consistency(result)
 
1524
                if changed or self.include_unchanged:
 
1525
                    return result
1413
1526
        # Have we finished the prior root, or never started one ?
1414
1527
        if self.current_root is None:
1415
1528
            # TODO: the pending list should be lexically sorted?  the
1416
1529
            # interface doesn't require it.
1417
1530
            try:
1418
1531
                self.current_root = self.search_specific_files.pop()
1419
 
            except KeyError:
 
1532
            except KeyError, _:
1420
1533
                raise StopIteration()
1421
 
            self.current_root_unicode = self.current_root.decode('utf8')
1422
1534
            self.searched_specific_files.add(self.current_root)
1423
1535
            # process the entries for this containing directory: the rest will be
1424
1536
            # found by their parents recursively.
1425
1537
            self.root_entries = self.state._entries_for_path(self.current_root)
1426
1538
            self.root_entries_len = len(self.root_entries)
 
1539
            self.current_root_unicode = self.current_root.decode('utf8')
1427
1540
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1428
1541
            try:
1429
1542
                root_stat = os.lstat(self.root_abspath)
1457
1570
            while self.root_entries_pos < self.root_entries_len:
1458
1571
                entry = self.root_entries[self.root_entries_pos]
1459
1572
                self.root_entries_pos = self.root_entries_pos + 1
1460
 
                result = self._process_entry(entry, self.root_dir_info)
1461
 
                if result is not None:
 
1573
                result, changed = self._process_entry(entry, self.root_dir_info)
 
1574
                if changed is not None:
1462
1575
                    path_handled = -1
1463
 
                    if result is not self.uninteresting:
 
1576
                    if changed:
 
1577
                        self._gather_result_for_consistency(result)
 
1578
                    if changed or self.include_unchanged:
1464
1579
                        return result
1465
1580
            # handle unversioned specified paths:
1466
1581
            if self.want_unversioned and not path_handled and self.root_dir_info:
1478
1593
                      )
1479
1594
            # If we reach here, the outer flow continues, which enters into the
1480
1595
            # per-root setup logic.
1481
 
        if self.current_dir_info is None and self.current_block is None:
 
1596
        if (self.current_dir_info is None and self.current_block is None and not
 
1597
            self.doing_consistency_expansion):
1482
1598
            # setup iteration of this root:
1483
1599
            self.current_dir_list = None
1484
1600
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1502
1618
                        #            and e.winerror == ERROR_DIRECTORY
1503
1619
                        try:
1504
1620
                            e_winerror = e.winerror
1505
 
                        except AttributeError:
 
1621
                        except AttributeError, _:
1506
1622
                            e_winerror = None
1507
1623
                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
1508
1624
                        if (e.errno in win_errors or e_winerror in win_errors):
1591
1707
                    try:
1592
1708
                        self.current_dir_info = self.dir_iterator.next()
1593
1709
                        self.current_dir_list = self.current_dir_info[1]
1594
 
                    except StopIteration:
 
1710
                    except StopIteration, _:
1595
1711
                        self.current_dir_info = None
1596
1712
                else: #(dircmp > 0)
1597
1713
                    # We have a dirblock entry for this location, but there
1606
1722
                        self.current_block_pos = self.current_block_pos + 1
1607
1723
                        # entry referring to file not present on disk.
1608
1724
                        # advance the entry only, after processing.
1609
 
                        result = self._process_entry(current_entry, None)
1610
 
                        if result is not None:
1611
 
                            if result is not self.uninteresting:
 
1725
                        result, changed = self._process_entry(current_entry, None)
 
1726
                        if changed is not None:
 
1727
                            if changed:
 
1728
                                self._gather_result_for_consistency(result)
 
1729
                            if changed or self.include_unchanged:
1612
1730
                                return result
1613
1731
                    self.block_index = self.block_index + 1
1614
1732
                    self._update_current_block()
1620
1738
            # More supplied paths to process
1621
1739
            self.current_root = None
1622
1740
            return self._iter_next()
 
1741
        # Start expanding more conservatively, adding paths the user may not
 
1742
        # have intended but required for consistent deltas.
 
1743
        self.doing_consistency_expansion = 1
 
1744
        if not self._pending_consistent_entries:
 
1745
            self._pending_consistent_entries = self._next_consistent_entries()
 
1746
        while self._pending_consistent_entries:
 
1747
            result, changed = self._pending_consistent_entries.pop()
 
1748
            if changed is not None:
 
1749
                return result
1623
1750
        raise StopIteration()
1624
1751
 
1625
1752
    cdef object _maybe_tree_ref(self, current_path_info):
1668
1795
                advance_entry = -1
1669
1796
                advance_path = -1
1670
1797
                result = None
 
1798
                changed = None
1671
1799
                path_handled = 0
1672
1800
                if current_entry is None:
1673
1801
                    # unversioned -  the check for path_handled when the path
1675
1803
                    pass
1676
1804
                elif current_path_info is None:
1677
1805
                    # no path is fine: the per entry code will handle it.
1678
 
                    result = self._process_entry(current_entry, current_path_info)
1679
 
                    if result is not None:
1680
 
                        if result is self.uninteresting:
1681
 
                            result = None
 
1806
                    result, changed = self._process_entry(current_entry,
 
1807
                        current_path_info)
1682
1808
                else:
1683
1809
                    minikind = _minikind_from_string(
1684
1810
                        current_entry[1][self.target_index][0])
1699
1825
                        else:
1700
1826
                            # entry referring to file not present on disk.
1701
1827
                            # advance the entry only, after processing.
1702
 
                            result = self._process_entry(current_entry, None)
1703
 
                            if result is not None:
1704
 
                                if result is self.uninteresting:
1705
 
                                    result = None
 
1828
                            result, changed = self._process_entry(current_entry,
 
1829
                                None)
1706
1830
                            advance_path = 0
1707
1831
                    else:
1708
1832
                        # paths are the same,and the dirstate entry is not
1709
1833
                        # absent or renamed.
1710
 
                        result = self._process_entry(current_entry, current_path_info)
1711
 
                        if result is not None:
 
1834
                        result, changed = self._process_entry(current_entry,
 
1835
                            current_path_info)
 
1836
                        if changed is not None:
1712
1837
                            path_handled = -1
1713
 
                            if result is self.uninteresting:
1714
 
                                result = None
 
1838
                            if not changed and not self.include_unchanged:
 
1839
                                changed = None
1715
1840
                # >- loop control starts here:
1716
1841
                # >- entry
1717
1842
                if advance_entry and current_entry is not None:
1730
1855
                                and stat.S_IEXEC & current_path_info[3].st_mode)
1731
1856
                            try:
1732
1857
                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
1733
 
                            except UnicodeDecodeError:
 
1858
                            except UnicodeDecodeError, _:
1734
1859
                                raise errors.BadFilenameEncoding(
1735
1860
                                    current_path_info[0], osutils._fs_enc)
1736
 
                            if result is not None:
 
1861
                            if changed is not None:
1737
1862
                                raise AssertionError(
1738
1863
                                    "result is not None: %r" % result)
1739
1864
                            result = (None,
1744
1869
                                (None, self.utf8_decode(current_path_info[1])[0]),
1745
1870
                                (None, current_path_info[2]),
1746
1871
                                (None, new_executable))
 
1872
                            changed = True
1747
1873
                        # dont descend into this unversioned path if it is
1748
1874
                        # a dir
1749
1875
                        if current_path_info[2] in ('directory'):
1762
1888
                                current_path_info)
1763
1889
                    else:
1764
1890
                        current_path_info = None
1765
 
                if result is not None:
 
1891
                if changed is not None:
1766
1892
                    # Found a result on this pass, yield it
1767
 
                    return result
 
1893
                    if changed:
 
1894
                        self._gather_result_for_consistency(result)
 
1895
                    if changed or self.include_unchanged:
 
1896
                        return result
1768
1897
            if self.current_block is not None:
1769
1898
                self.block_index = self.block_index + 1
1770
1899
                self._update_current_block()
1774
1903
                try:
1775
1904
                    self.current_dir_info = self.dir_iterator.next()
1776
1905
                    self.current_dir_list = self.current_dir_info[1]
1777
 
                except StopIteration:
 
1906
                except StopIteration, _:
1778
1907
                    self.current_dir_info = None
 
1908
 
 
1909
    cdef object _next_consistent_entries(self):
 
1910
        """Grabs the next specific file parent case to consider.
 
1911
        
 
1912
        :return: A list of the results, each of which is as for _process_entry.
 
1913
        """
 
1914
        results = []
 
1915
        while self.search_specific_file_parents:
 
1916
            # Process the parent directories for the paths we were iterating.
 
1917
            # Even in extremely large trees this should be modest, so currently
 
1918
            # no attempt is made to optimise.
 
1919
            path_utf8 = self.search_specific_file_parents.pop()
 
1920
            if path_utf8 in self.searched_exact_paths:
 
1921
                # We've examined this path.
 
1922
                continue
 
1923
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
1924
                # We've examined this path.
 
1925
                continue
 
1926
            path_entries = self.state._entries_for_path(path_utf8)
 
1927
            # We need either one or two entries. If the path in
 
1928
            # self.target_index has moved (so the entry in source_index is in
 
1929
            # 'ar') then we need to also look for the entry for this path in
 
1930
            # self.source_index, to output the appropriate delete-or-rename.
 
1931
            selected_entries = []
 
1932
            found_item = False
 
1933
            for candidate_entry in path_entries:
 
1934
                # Find entries present in target at this path:
 
1935
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
1936
                    found_item = True
 
1937
                    selected_entries.append(candidate_entry)
 
1938
                # Find entries present in source at this path:
 
1939
                elif (self.source_index is not None and
 
1940
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
1941
                    found_item = True
 
1942
                    if candidate_entry[1][self.target_index][0] == 'a':
 
1943
                        # Deleted, emit it here.
 
1944
                        selected_entries.append(candidate_entry)
 
1945
                    else:
 
1946
                        # renamed, emit it when we process the directory it
 
1947
                        # ended up at.
 
1948
                        self.search_specific_file_parents.add(
 
1949
                            candidate_entry[1][self.target_index][1])
 
1950
            if not found_item:
 
1951
                raise AssertionError(
 
1952
                    "Missing entry for specific path parent %r, %r" % (
 
1953
                    path_utf8, path_entries))
 
1954
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
1955
            for entry in selected_entries:
 
1956
                if entry[0][2] in self.seen_ids:
 
1957
                    continue
 
1958
                result, changed = self._process_entry(entry, path_info)
 
1959
                if changed is None:
 
1960
                    raise AssertionError(
 
1961
                        "Got entry<->path mismatch for specific path "
 
1962
                        "%r entry %r path_info %r " % (
 
1963
                        path_utf8, entry, path_info))
 
1964
                # Only include changes - we're outside the users requested
 
1965
                # expansion.
 
1966
                if changed:
 
1967
                    self._gather_result_for_consistency(result)
 
1968
                    if (result[6][0] == 'directory' and
 
1969
                        result[6][1] != 'directory'):
 
1970
                        # This stopped being a directory, the old children have
 
1971
                        # to be included.
 
1972
                        if entry[1][self.source_index][0] == 'r':
 
1973
                            # renamed, take the source path
 
1974
                            entry_path_utf8 = entry[1][self.source_index][1]
 
1975
                        else:
 
1976
                            entry_path_utf8 = path_utf8
 
1977
                        initial_key = (entry_path_utf8, '', '')
 
1978
                        block_index, _ = self.state._find_block_index_from_key(
 
1979
                            initial_key)
 
1980
                        if block_index == 0:
 
1981
                            # The children of the root are in block index 1.
 
1982
                            block_index = block_index + 1
 
1983
                        current_block = None
 
1984
                        if block_index < len(self.state._dirblocks):
 
1985
                            current_block = self.state._dirblocks[block_index]
 
1986
                            if not osutils.is_inside(
 
1987
                                entry_path_utf8, current_block[0]):
 
1988
                                # No entries for this directory at all.
 
1989
                                current_block = None
 
1990
                        if current_block is not None:
 
1991
                            for entry in current_block[1]:
 
1992
                                if entry[1][self.source_index][0] in 'ar':
 
1993
                                    # Not in the source tree, so doesn't have to be
 
1994
                                    # included.
 
1995
                                    continue
 
1996
                                # Path of the entry itself.
 
1997
                                self.search_specific_file_parents.add(
 
1998
                                    self.pathjoin(*entry[0][:2]))
 
1999
                if changed or self.include_unchanged:
 
2000
                    results.append((result, changed))
 
2001
            self.searched_exact_paths.add(path_utf8)
 
2002
        return results
 
2003
 
 
2004
    cdef object _path_info(self, utf8_path, unicode_path):
 
2005
        """Generate path_info for unicode_path.
 
2006
 
 
2007
        :return: None if unicode_path does not exist, or a path_info tuple.
 
2008
        """
 
2009
        abspath = self.tree.abspath(unicode_path)
 
2010
        try:
 
2011
            stat = os.lstat(abspath)
 
2012
        except OSError, e:
 
2013
            if e.errno == errno.ENOENT:
 
2014
                # the path does not exist.
 
2015
                return None
 
2016
            else:
 
2017
                raise
 
2018
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
2019
        dir_info = (utf8_path, utf8_basename,
 
2020
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
2021
            abspath)
 
2022
        if dir_info[2] == 'directory':
 
2023
            if self.tree._directory_is_tree_reference(
 
2024
                unicode_path):
 
2025
                self.root_dir_info = self.root_dir_info[:2] + \
 
2026
                    ('tree-reference',) + self.root_dir_info[3:]
 
2027
        return dir_info