~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_c.pyx

  • Committer: Aaron Bentley
  • Date: 2009-06-19 21:16:31 UTC
  • mto: This revision was merged to the branch mainline in revision 4481.
  • Revision ID: aaron@aaronbentley.com-20090619211631-4fnkv2uui98xj7ux
Provide control over switch and shelver messaging.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2010 Canonical Ltd
 
1
# Copyright (C) 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import parent_directories, pathjoin, splitpath
 
31
from bzrlib.osutils import pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_pyx.h":
 
57
cdef extern from "_dirstate_helpers_c.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
118
118
    # ??? memrchr is a GNU extension :(
119
119
    # void *memrchr(void *s, int c, size_t len)
120
120
 
121
 
# cimport all of the definitions we will need to access
122
 
from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
123
 
    StaticTuple_New, StaticTuple_SET_ITEM
124
 
 
125
 
import_static_tuple_c()
126
 
 
127
 
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
 
121
 
 
122
cdef void* _my_memrchr(void *s, int c, size_t n):
128
123
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
129
124
    cdef char *pos
130
125
    cdef char *start
161
156
        return None
162
157
    return <char*>found - <char*>_s
163
158
 
164
 
 
165
159
cdef object safe_string_from_size(char *s, Py_ssize_t size):
166
160
    if size < 0:
 
161
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
167
162
        raise AssertionError(
168
 
            'tried to create a string with an invalid size: %d'
169
 
            % (size))
 
163
            'tried to create a string with an invalid size: %d @0x%x'
 
164
            % (size, <int>s))
170
165
    return PyString_FromStringAndSize(s, size)
171
166
 
172
167
 
173
 
cdef int _is_aligned(void *ptr): # cannot_raise
 
168
cdef int _is_aligned(void *ptr):
174
169
    """Is this pointer aligned to an integer size offset?
175
170
 
176
171
    :return: 1 if this pointer is aligned, 0 otherwise.
178
173
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
179
174
 
180
175
 
181
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
 
176
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
182
177
    cdef unsigned char *cur1
183
178
    cdef unsigned char *cur2
184
179
    cdef unsigned char *end1
242
237
    return 0
243
238
 
244
239
 
245
 
def cmp_by_dirs(path1, path2):
 
240
def cmp_by_dirs_c(path1, path2):
246
241
    """Compare two paths directory by directory.
247
242
 
248
243
    This is equivalent to doing::
271
266
                        PyString_Size(path2))
272
267
 
273
268
 
274
 
def _cmp_path_by_dirblock(path1, path2):
 
269
def _cmp_path_by_dirblock_c(path1, path2):
275
270
    """Compare two paths based on what directory they are in.
276
271
 
277
272
    This generates a sort order, such that all children of a directory are
293
288
    if not PyString_CheckExact(path2):
294
289
        raise TypeError("'path2' must be a plain string, not %s: %r"
295
290
                        % (type(path2), path2))
296
 
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
297
 
                                        PyString_Size(path1),
298
 
                                        PyString_AsString(path2),
299
 
                                        PyString_Size(path2))
300
 
 
301
 
 
302
 
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
303
 
                                      char *path2, int path2_len): # cannot_raise
 
291
    return _cmp_path_by_dirblock(PyString_AsString(path1),
 
292
                                 PyString_Size(path1),
 
293
                                 PyString_AsString(path2),
 
294
                                 PyString_Size(path2))
 
295
 
 
296
 
 
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
 
298
                               char *path2, int path2_len):
304
299
    """Compare two paths by what directory they are in.
305
300
 
306
 
    see ``_cmp_path_by_dirblock`` for details.
 
301
    see ``_cmp_path_by_dirblock_c`` for details.
307
302
    """
308
303
    cdef char *dirname1
309
304
    cdef int dirname1_len
373
368
    return 1
374
369
 
375
370
 
376
 
def _bisect_path_left(paths, path):
 
371
def _bisect_path_left_c(paths, path):
377
372
    """Return the index where to insert path into paths.
378
373
 
379
374
    This uses a path-wise comparison so we get::
418
413
        cur = PyList_GetItem_object_void(paths, _mid)
419
414
        cur_cstr = PyString_AS_STRING_void(cur)
420
415
        cur_size = PyString_GET_SIZE_void(cur)
421
 
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
422
 
                                        path_cstr, path_size) < 0:
 
416
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
423
417
            _lo = _mid + 1
424
418
        else:
425
419
            _hi = _mid
426
420
    return _lo
427
421
 
428
422
 
429
 
def _bisect_path_right(paths, path):
 
423
def _bisect_path_right_c(paths, path):
430
424
    """Return the index where to insert path into paths.
431
425
 
432
426
    This uses a path-wise comparison so we get::
471
465
        cur = PyList_GetItem_object_void(paths, _mid)
472
466
        cur_cstr = PyString_AS_STRING_void(cur)
473
467
        cur_size = PyString_GET_SIZE_void(cur)
474
 
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
475
 
                                        cur_cstr, cur_size) < 0:
 
468
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
476
469
            _hi = _mid
477
470
        else:
478
471
            _lo = _mid + 1
479
472
    return _lo
480
473
 
481
474
 
482
 
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
 
475
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
483
476
    """Return the index where to insert dirname into the dirblocks.
484
477
 
485
478
    The return value idx is such that all directories blocks in dirblock[:idx]
615
608
        :param new_block: This is to let the caller know that it needs to
616
609
            create a new directory block to store the next entry.
617
610
        """
618
 
        cdef StaticTuple path_name_file_id_key
619
 
        cdef StaticTuple tmp
 
611
        cdef object path_name_file_id_key
620
612
        cdef char *entry_size_cstr
621
613
        cdef unsigned long int entry_size
622
614
        cdef char* executable_cstr
656
648
        # Build up the key that will be used.
657
649
        # By using <object>(void *) Pyrex will automatically handle the
658
650
        # Py_INCREF that we need.
659
 
        cur_dirname = <object>p_current_dirname[0]
660
 
        # Use StaticTuple_New to pre-allocate, rather than creating a regular
661
 
        # tuple and passing it to the StaticTuple constructor.
662
 
        # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
663
 
        #                          self.get_next_str(),
664
 
        #                          self.get_next_str(),
665
 
        #                         )
666
 
        tmp = StaticTuple_New(3)
667
 
        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
668
 
        cur_basename = self.get_next_str()
669
 
        cur_file_id = self.get_next_str()
670
 
        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
671
 
        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
672
 
        path_name_file_id_key = tmp
 
651
        path_name_file_id_key = (<object>p_current_dirname[0],
 
652
                                 self.get_next_str(),
 
653
                                 self.get_next_str(),
 
654
                                )
673
655
 
674
656
        # Parse all of the per-tree information. current has the information in
675
657
        # the same location as parent trees. The only difference is that 'info'
693
675
            executable_cstr = self.get_next(&cur_size)
694
676
            is_executable = (executable_cstr[0] == c'y')
695
677
            info = self.get_next_str()
696
 
            # TODO: If we want to use StaticTuple_New here we need to be pretty
697
 
            #       careful. We are relying on a bit of Pyrex
698
 
            #       automatic-conversion from 'int' to PyInt, and that doesn't
699
 
            #       play well with the StaticTuple_SET_ITEM macro.
700
 
            #       Timing doesn't (yet) show a worthwile improvement in speed
701
 
            #       versus complexity and maintainability.
702
 
            # tmp = StaticTuple_New(5)
703
 
            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
704
 
            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
705
 
            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
706
 
            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
707
 
            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
708
 
            # PyList_Append(trees, tmp)
709
 
            PyList_Append(trees, StaticTuple(
 
678
            PyList_Append(trees, (
710
679
                minikind,     # minikind
711
680
                fingerprint,  # fingerprint
712
681
                entry_size,   # size
775
744
        self.state._split_root_dirblock_into_contents()
776
745
 
777
746
 
778
 
def _read_dirblocks(state):
 
747
def _read_dirblocks_c(state):
779
748
    """Read in the dirblocks for the given DirState object.
780
749
 
781
750
    This is tightly bound to the DirState internal representation. It should be
797
766
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
798
767
 
799
768
 
800
 
cdef int minikind_from_mode(int mode): # cannot_raise
 
769
cdef int minikind_from_mode(int mode):
801
770
    # in order of frequency:
802
771
    if S_ISREG(mode):
803
772
        return c"f"
866
835
    # _st mode of the compiled stat objects.
867
836
    cdef int minikind, saved_minikind
868
837
    cdef void * details
869
 
    cdef int worth_saving
870
838
    minikind = minikind_from_mode(stat_value.st_mode)
871
839
    if 0 == minikind:
872
840
        return None
901
869
    # If we have gotten this far, that means that we need to actually
902
870
    # process this entry.
903
871
    link_or_sha1 = None
904
 
    worth_saving = 1
905
872
    if minikind == c'f':
906
873
        executable = self._is_executable(stat_value.st_mode,
907
874
                                         saved_executable)
918
885
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
919
886
                           executable, packed_stat)
920
887
        else:
921
 
            # This file is not worth caching the sha1. Either it is too new, or
922
 
            # it is newly added. Regardless, the only things we are changing
923
 
            # are derived from the stat, and so are not worth caching. So we do
924
 
            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
925
 
            # updated values if there is *other* data worth saving.)
926
 
            entry[1][0] = ('f', '', stat_value.st_size, executable,
927
 
                           DirState.NULLSTAT)
928
 
            worth_saving = 0
 
888
            entry[1][0] = ('f', '', stat_value.st_size,
 
889
                           executable, DirState.NULLSTAT)
929
890
    elif minikind == c'd':
 
891
        link_or_sha1 = None
930
892
        entry[1][0] = ('d', '', 0, False, packed_stat)
931
893
        if saved_minikind != c'd':
932
894
            # This changed from something into a directory. Make sure we
936
898
                self._get_block_entry_index(entry[0][0], entry[0][1], 0)
937
899
            self._ensure_block(block_index, entry_index,
938
900
                               pathjoin(entry[0][0], entry[0][1]))
939
 
        else:
940
 
            # Any changes are derived trivially from the stat object, not worth
941
 
            # re-writing a dirstate for just this
942
 
            worth_saving = 0
943
901
    elif minikind == c'l':
944
 
        if saved_minikind == c'l':
945
 
            # If the object hasn't changed kind, it isn't worth saving the
946
 
            # dirstate just for a symlink. The default is 'fast symlinks' which
947
 
            # save the target in the inode entry, rather than separately. So to
948
 
            # stat, we've already read everything off disk.
949
 
            worth_saving = 0
950
902
        link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
951
903
        if self._cutoff_time is None:
952
904
            self._sha_cutoff_time()
957
909
        else:
958
910
            entry[1][0] = ('l', '', stat_value.st_size,
959
911
                           False, DirState.NULLSTAT)
960
 
    if worth_saving:
961
 
        # Note, even though _mark_modified will only set
962
 
        # IN_MEMORY_HASH_MODIFIED, it still isn't worth 
963
 
        self._mark_modified([entry])
 
912
    self._dirblock_state = DirState.IN_MEMORY_MODIFIED
964
913
    return link_or_sha1
965
914
 
966
915
 
967
 
# TODO: Do we want to worry about exceptions here?
968
 
cdef char _minikind_from_string(object string) except? -1:
 
916
cdef char _minikind_from_string(object string):
969
917
    """Convert a python string to a char."""
970
918
    return PyString_AsString(string)[0]
971
919
 
1003
951
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
1004
952
 
1005
953
 
1006
 
cdef int _versioned_minikind(char minikind): # cannot_raise
 
954
cdef int _versioned_minikind(char minikind):
1007
955
    """Return non-zero if minikind is in fltd"""
1008
956
    return (minikind == c'f' or
1009
957
            minikind == c'd' or
1013
961
 
1014
962
cdef class ProcessEntryC:
1015
963
 
1016
 
    cdef int doing_consistency_expansion
1017
964
    cdef object old_dirname_to_file_id # dict
1018
965
    cdef object new_dirname_to_file_id # dict
 
966
    cdef readonly object uninteresting
1019
967
    cdef object last_source_parent
1020
968
    cdef object last_target_parent
1021
 
    cdef int include_unchanged
1022
 
    cdef int partial
 
969
    cdef object include_unchanged
1023
970
    cdef object use_filesystem_for_exec
1024
971
    cdef object utf8_decode
1025
972
    cdef readonly object searched_specific_files
1026
 
    cdef readonly object searched_exact_paths
1027
973
    cdef object search_specific_files
1028
 
    # The parents up to the root of the paths we are searching.
1029
 
    # After all normal paths are returned, these specific items are returned.
1030
 
    cdef object search_specific_file_parents
1031
974
    cdef object state
1032
975
    # Current iteration variables:
1033
976
    cdef object current_root
1045
988
    cdef object current_block_list
1046
989
    cdef object current_dir_info
1047
990
    cdef object current_dir_list
1048
 
    cdef object _pending_consistent_entries # list
1049
991
    cdef int path_index
1050
992
    cdef object root_dir_info
1051
993
    cdef object bisect_left
1052
994
    cdef object pathjoin
1053
995
    cdef object fstat
1054
 
    # A set of the ids we've output when doing partial output.
1055
 
    cdef object seen_ids
1056
996
    cdef object sha_file
1057
997
 
1058
998
    def __init__(self, include_unchanged, use_filesystem_for_exec,
1059
999
        search_specific_files, state, source_index, target_index,
1060
1000
        want_unversioned, tree):
1061
 
        self.doing_consistency_expansion = 0
1062
1001
        self.old_dirname_to_file_id = {}
1063
1002
        self.new_dirname_to_file_id = {}
1064
 
        # Are we doing a partial iter_changes?
1065
 
        self.partial = set(['']).__ne__(search_specific_files)
 
1003
        # Just a sentry, so that _process_entry can say that this
 
1004
        # record is handled, but isn't interesting to process (unchanged)
 
1005
        self.uninteresting = object()
1066
1006
        # Using a list so that we can access the values and change them in
1067
1007
        # nested scope. Each one is [path, file_id, entry]
1068
1008
        self.last_source_parent = [None, None]
1069
1009
        self.last_target_parent = [None, None]
1070
 
        if include_unchanged is None:
1071
 
            self.include_unchanged = False
1072
 
        else:
1073
 
            self.include_unchanged = int(include_unchanged)
 
1010
        self.include_unchanged = include_unchanged
1074
1011
        self.use_filesystem_for_exec = use_filesystem_for_exec
1075
1012
        self.utf8_decode = cache_utf8._utf8_decode
1076
1013
        # for all search_indexs in each path at or under each element of
1077
 
        # search_specific_files, if the detail is relocated: add the id, and
1078
 
        # add the relocated path as one to search if its not searched already.
1079
 
        # If the detail is not relocated, add the id.
 
1014
        # search_specific_files, if the detail is relocated: add the id, and add the
 
1015
        # relocated path as one to search if its not searched already. If the
 
1016
        # detail is not relocated, add the id.
1080
1017
        self.searched_specific_files = set()
1081
 
        # When we search exact paths without expanding downwards, we record
1082
 
        # that here.
1083
 
        self.searched_exact_paths = set()
1084
1018
        self.search_specific_files = search_specific_files
1085
 
        # The parents up to the root of the paths we are searching.
1086
 
        # After all normal paths are returned, these specific items are returned.
1087
 
        self.search_specific_file_parents = set()
1088
 
        # The ids we've sent out in the delta.
1089
 
        self.seen_ids = set()
1090
1019
        self.state = state
1091
1020
        self.current_root = None
1092
1021
        self.current_root_unicode = None
1108
1037
        self.current_block_pos = -1
1109
1038
        self.current_dir_info = None
1110
1039
        self.current_dir_list = None
1111
 
        self._pending_consistent_entries = []
1112
1040
        self.path_index = 0
1113
1041
        self.root_dir_info = None
1114
1042
        self.bisect_left = bisect.bisect_left
1115
1043
        self.pathjoin = osutils.pathjoin
1116
1044
        self.fstat = os.fstat
1117
1045
        self.sha_file = osutils.sha_file
1118
 
        if target_index != 0:
1119
 
            # A lot of code in here depends on target_index == 0
1120
 
            raise errors.BzrError('unsupported target index')
1121
1046
 
1122
1047
    cdef _process_entry(self, entry, path_info):
1123
1048
        """Compare an entry and real disk to generate delta information.
1124
1049
 
1125
1050
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1126
 
            the path of entry. If None, then the path is considered absent in 
1127
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
1051
            the path of entry. If None, then the path is considered absent.
 
1052
            (Perhaps we should pass in a concrete entry for this ?)
1128
1053
            Basename is returned as a utf8 string because we expect this
1129
1054
            tuple will be ignored, and don't want to take the time to
1130
1055
            decode.
1131
 
        :return: (iter_changes_result, changed). If the entry has not been
1132
 
            handled then changed is None. Otherwise it is False if no content
1133
 
            or metadata changes have occured, and True if any content or
1134
 
            metadata change has occurred. If self.include_unchanged is True then
1135
 
            if changed is not None, iter_changes_result will always be a result
1136
 
            tuple. Otherwise, iter_changes_result is None unless changed is
1137
 
            True.
 
1056
        :return: None if the these don't match
 
1057
                 A tuple of information about the change, or
 
1058
                 the object 'uninteresting' if these match, but are
 
1059
                 basically identical.
1138
1060
        """
1139
1061
        cdef char target_minikind
1140
1062
        cdef char source_minikind
1176
1098
            else:
1177
1099
                # add the source to the search path to find any children it
1178
1100
                # has.  TODO ? : only add if it is a container ?
1179
 
                if (not self.doing_consistency_expansion and 
1180
 
                    not osutils.is_inside_any(self.searched_specific_files,
1181
 
                                             source_details[1])):
 
1101
                if not osutils.is_inside_any(self.searched_specific_files,
 
1102
                                             source_details[1]):
1182
1103
                    self.search_specific_files.add(source_details[1])
1183
 
                    # expanding from a user requested path, parent expansion
1184
 
                    # for delta consistency happens later.
1185
1104
                # generate the old path; this is needed for stating later
1186
1105
                # as well.
1187
1106
                old_path = source_details[1]
1252
1171
                        content_change = 0
1253
1172
                    target_exec = False
1254
1173
                else:
1255
 
                    if path is None:
1256
 
                        path = self.pathjoin(old_dirname, old_basename)
1257
 
                    raise errors.BadFileKindError(path, path_info[2])
 
1174
                    raise Exception, "unknown kind %s" % path_info[2]
1258
1175
            if source_minikind == c'd':
1259
1176
                if path is None:
1260
1177
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1262
1179
                    file_id = entry[0][2]
1263
1180
                self.old_dirname_to_file_id[old_path] = file_id
1264
1181
            # parent id is the entry for the path in the target tree
1265
 
            if old_basename and old_dirname == self.last_source_parent[0]:
1266
 
                # use a cached hit for non-root source entries.
 
1182
            if old_dirname == self.last_source_parent[0]:
1267
1183
                source_parent_id = self.last_source_parent[1]
1268
1184
            else:
1269
1185
                try:
1270
1186
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
1271
 
                except KeyError, _:
 
1187
                except KeyError:
1272
1188
                    source_parent_entry = self.state._get_entry(self.source_index,
1273
1189
                                                           path_utf8=old_dirname)
1274
1190
                    source_parent_id = source_parent_entry[0][2]
1279
1195
                    self.last_source_parent[0] = old_dirname
1280
1196
                    self.last_source_parent[1] = source_parent_id
1281
1197
            new_dirname = entry[0][0]
1282
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
1283
 
                # use a cached hit for non-root target entries.
 
1198
            if new_dirname == self.last_target_parent[0]:
1284
1199
                target_parent_id = self.last_target_parent[1]
1285
1200
            else:
1286
1201
                try:
1287
1202
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
1288
 
                except KeyError, _:
 
1203
                except KeyError:
1289
1204
                    # TODO: We don't always need to do the lookup, because the
1290
1205
                    #       parent entry will be the same as the source entry.
1291
1206
                    target_parent_entry = self.state._get_entry(self.target_index,
1303
1218
                    self.last_target_parent[1] = target_parent_id
1304
1219
 
1305
1220
            source_exec = source_details[3]
1306
 
            changed = (content_change
 
1221
            if (self.include_unchanged
 
1222
                or content_change
1307
1223
                or source_parent_id != target_parent_id
1308
1224
                or old_basename != entry[0][1]
1309
1225
                or source_exec != target_exec
1310
 
                )
1311
 
            if not changed and not self.include_unchanged:
1312
 
                return None, False
1313
 
            else:
 
1226
                ):
1314
1227
                if old_path is None:
1315
1228
                    path = self.pathjoin(old_dirname, old_basename)
1316
1229
                    old_path = path
1330
1243
                       (source_parent_id, target_parent_id),
1331
1244
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1332
1245
                       (source_kind, target_kind),
1333
 
                       (source_exec, target_exec)), changed
 
1246
                       (source_exec, target_exec))
 
1247
            else:
 
1248
                return self.uninteresting
1334
1249
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1335
1250
            # looks like a new file
1336
1251
            path = self.pathjoin(entry[0][0], entry[0][1])
1363
1278
                       (None, parent_id),
1364
1279
                       (None, self.utf8_decode(entry[0][1])[0]),
1365
1280
                       (None, path_info[2]),
1366
 
                       (None, target_exec)), True
 
1281
                       (None, target_exec))
1367
1282
            else:
1368
1283
                # Its a missing file, report it as such.
1369
1284
                return (entry[0][2],
1373
1288
                       (None, parent_id),
1374
1289
                       (None, self.utf8_decode(entry[0][1])[0]),
1375
1290
                       (None, None),
1376
 
                       (None, False)), True
 
1291
                       (None, False))
1377
1292
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1378
1293
            # unversioned, possibly, or possibly not deleted: we dont care.
1379
1294
            # if its still on disk, *and* theres no other entry at this
1391
1306
                   (parent_id, None),
1392
1307
                   (self.utf8_decode(entry[0][1])[0], None),
1393
1308
                   (_minikind_to_kind(source_minikind), None),
1394
 
                   (source_details[3], None)), True
 
1309
                   (source_details[3], None))
1395
1310
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1396
1311
            # a rename; could be a true rename, or a rename inherited from
1397
1312
            # a renamed parent. TODO: handle this efficiently. Its not
1398
1313
            # common case to rename dirs though, so a correct but slow
1399
1314
            # implementation will do.
1400
 
            if (not self.doing_consistency_expansion and 
1401
 
                not osutils.is_inside_any(self.searched_specific_files,
1402
 
                    target_details[1])):
 
1315
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1403
1316
                self.search_specific_files.add(target_details[1])
1404
 
                # We don't expand the specific files parents list here as
1405
 
                # the path is absent in target and won't create a delta with
1406
 
                # missing parent.
1407
1317
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1408
1318
              (target_minikind == c'r' or target_minikind == c'a')):
1409
1319
            # neither of the selected trees contain this path,
1415
1325
                "source_minikind=%r, target_minikind=%r"
1416
1326
                % (source_minikind, target_minikind))
1417
1327
            ## import pdb;pdb.set_trace()
1418
 
        return None, None
 
1328
        return None
1419
1329
 
1420
1330
    def __iter__(self):
1421
1331
        return self
1423
1333
    def iter_changes(self):
1424
1334
        return self
1425
1335
 
1426
 
    cdef int _gather_result_for_consistency(self, result) except -1:
1427
 
        """Check a result we will yield to make sure we are consistent later.
1428
 
        
1429
 
        This gathers result's parents into a set to output later.
1430
 
 
1431
 
        :param result: A result tuple.
1432
 
        """
1433
 
        if not self.partial or not result[0]:
1434
 
            return 0
1435
 
        self.seen_ids.add(result[0])
1436
 
        new_path = result[1][1]
1437
 
        if new_path:
1438
 
            # Not the root and not a delete: queue up the parents of the path.
1439
 
            self.search_specific_file_parents.update(
1440
 
                osutils.parent_directories(new_path.encode('utf8')))
1441
 
            # Add the root directory which parent_directories does not
1442
 
            # provide.
1443
 
            self.search_specific_file_parents.add('')
1444
 
        return 0
1445
 
 
1446
 
    cdef int _update_current_block(self) except -1:
 
1336
    cdef void _update_current_block(self):
1447
1337
        if (self.block_index < len(self.state._dirblocks) and
1448
1338
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1449
1339
            self.current_block = self.state._dirblocks[self.block_index]
1452
1342
        else:
1453
1343
            self.current_block = None
1454
1344
            self.current_block_list = None
1455
 
        return 0
1456
1345
 
1457
1346
    def __next__(self):
1458
1347
        # Simple thunk to allow tail recursion without pyrex confusion
1510
1399
        cdef char * current_dirname_c, * current_blockname_c
1511
1400
        cdef int advance_entry, advance_path
1512
1401
        cdef int path_handled
 
1402
        uninteresting = self.uninteresting
1513
1403
        searched_specific_files = self.searched_specific_files
1514
1404
        # Are we walking a root?
1515
1405
        while self.root_entries_pos < self.root_entries_len:
1516
1406
            entry = self.root_entries[self.root_entries_pos]
1517
1407
            self.root_entries_pos = self.root_entries_pos + 1
1518
 
            result, changed = self._process_entry(entry, self.root_dir_info)
1519
 
            if changed is not None:
1520
 
                if changed:
1521
 
                    self._gather_result_for_consistency(result)
1522
 
                if changed or self.include_unchanged:
1523
 
                    return result
 
1408
            result = self._process_entry(entry, self.root_dir_info)
 
1409
            if result is not None and result is not self.uninteresting:
 
1410
                return result
1524
1411
        # Have we finished the prior root, or never started one ?
1525
1412
        if self.current_root is None:
1526
1413
            # TODO: the pending list should be lexically sorted?  the
1527
1414
            # interface doesn't require it.
1528
1415
            try:
1529
1416
                self.current_root = self.search_specific_files.pop()
1530
 
            except KeyError, _:
 
1417
            except KeyError:
1531
1418
                raise StopIteration()
 
1419
            self.current_root_unicode = self.current_root.decode('utf8')
1532
1420
            self.searched_specific_files.add(self.current_root)
1533
1421
            # process the entries for this containing directory: the rest will be
1534
1422
            # found by their parents recursively.
1535
1423
            self.root_entries = self.state._entries_for_path(self.current_root)
1536
1424
            self.root_entries_len = len(self.root_entries)
1537
 
            self.current_root_unicode = self.current_root.decode('utf8')
1538
1425
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1539
1426
            try:
1540
1427
                root_stat = os.lstat(self.root_abspath)
1568
1455
            while self.root_entries_pos < self.root_entries_len:
1569
1456
                entry = self.root_entries[self.root_entries_pos]
1570
1457
                self.root_entries_pos = self.root_entries_pos + 1
1571
 
                result, changed = self._process_entry(entry, self.root_dir_info)
1572
 
                if changed is not None:
 
1458
                result = self._process_entry(entry, self.root_dir_info)
 
1459
                if result is not None:
1573
1460
                    path_handled = -1
1574
 
                    if changed:
1575
 
                        self._gather_result_for_consistency(result)
1576
 
                    if changed or self.include_unchanged:
 
1461
                    if result is not self.uninteresting:
1577
1462
                        return result
1578
1463
            # handle unversioned specified paths:
1579
1464
            if self.want_unversioned and not path_handled and self.root_dir_info:
1591
1476
                      )
1592
1477
            # If we reach here, the outer flow continues, which enters into the
1593
1478
            # per-root setup logic.
1594
 
        if (self.current_dir_info is None and self.current_block is None and not
1595
 
            self.doing_consistency_expansion):
 
1479
        if self.current_dir_info is None and self.current_block is None:
1596
1480
            # setup iteration of this root:
1597
1481
            self.current_dir_list = None
1598
1482
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1616
1500
                        #            and e.winerror == ERROR_DIRECTORY
1617
1501
                        try:
1618
1502
                            e_winerror = e.winerror
1619
 
                        except AttributeError, _:
 
1503
                        except AttributeError:
1620
1504
                            e_winerror = None
1621
1505
                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
1622
1506
                        if (e.errno in win_errors or e_winerror in win_errors):
1705
1589
                    try:
1706
1590
                        self.current_dir_info = self.dir_iterator.next()
1707
1591
                        self.current_dir_list = self.current_dir_info[1]
1708
 
                    except StopIteration, _:
 
1592
                    except StopIteration:
1709
1593
                        self.current_dir_info = None
1710
1594
                else: #(dircmp > 0)
1711
1595
                    # We have a dirblock entry for this location, but there
1720
1604
                        self.current_block_pos = self.current_block_pos + 1
1721
1605
                        # entry referring to file not present on disk.
1722
1606
                        # advance the entry only, after processing.
1723
 
                        result, changed = self._process_entry(current_entry, None)
1724
 
                        if changed is not None:
1725
 
                            if changed:
1726
 
                                self._gather_result_for_consistency(result)
1727
 
                            if changed or self.include_unchanged:
 
1607
                        result = self._process_entry(current_entry, None)
 
1608
                        if result is not None:
 
1609
                            if result is not self.uninteresting:
1728
1610
                                return result
1729
1611
                    self.block_index = self.block_index + 1
1730
1612
                    self._update_current_block()
1736
1618
            # More supplied paths to process
1737
1619
            self.current_root = None
1738
1620
            return self._iter_next()
1739
 
        # Start expanding more conservatively, adding paths the user may not
1740
 
        # have intended but required for consistent deltas.
1741
 
        self.doing_consistency_expansion = 1
1742
 
        if not self._pending_consistent_entries:
1743
 
            self._pending_consistent_entries = self._next_consistent_entries()
1744
 
        while self._pending_consistent_entries:
1745
 
            result, changed = self._pending_consistent_entries.pop()
1746
 
            if changed is not None:
1747
 
                return result
1748
1621
        raise StopIteration()
1749
1622
 
1750
1623
    cdef object _maybe_tree_ref(self, current_path_info):
1800
1673
                    pass
1801
1674
                elif current_path_info is None:
1802
1675
                    # no path is fine: the per entry code will handle it.
1803
 
                    result, changed = self._process_entry(current_entry,
1804
 
                        current_path_info)
 
1676
                    result = self._process_entry(current_entry, current_path_info)
 
1677
                    if result is not None:
 
1678
                        if result is self.uninteresting:
 
1679
                            result = None
1805
1680
                else:
1806
1681
                    minikind = _minikind_from_string(
1807
1682
                        current_entry[1][self.target_index][0])
1822
1697
                        else:
1823
1698
                            # entry referring to file not present on disk.
1824
1699
                            # advance the entry only, after processing.
1825
 
                            result, changed = self._process_entry(current_entry,
1826
 
                                None)
 
1700
                            result = self._process_entry(current_entry, None)
 
1701
                            if result is not None:
 
1702
                                if result is self.uninteresting:
 
1703
                                    result = None
1827
1704
                            advance_path = 0
1828
1705
                    else:
1829
1706
                        # paths are the same,and the dirstate entry is not
1830
1707
                        # absent or renamed.
1831
 
                        result, changed = self._process_entry(current_entry,
1832
 
                            current_path_info)
1833
 
                        if changed is not None:
 
1708
                        result = self._process_entry(current_entry, current_path_info)
 
1709
                        if result is not None:
1834
1710
                            path_handled = -1
1835
 
                            if not changed and not self.include_unchanged:
1836
 
                                changed = None
 
1711
                            if result is self.uninteresting:
 
1712
                                result = None
1837
1713
                # >- loop control starts here:
1838
1714
                # >- entry
1839
1715
                if advance_entry and current_entry is not None:
1852
1728
                                and stat.S_IEXEC & current_path_info[3].st_mode)
1853
1729
                            try:
1854
1730
                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
1855
 
                            except UnicodeDecodeError, _:
 
1731
                            except UnicodeDecodeError:
1856
1732
                                raise errors.BadFilenameEncoding(
1857
1733
                                    current_path_info[0], osutils._fs_enc)
1858
 
                            if changed is not None:
 
1734
                            if result is not None:
1859
1735
                                raise AssertionError(
1860
1736
                                    "result is not None: %r" % result)
1861
1737
                            result = (None,
1866
1742
                                (None, self.utf8_decode(current_path_info[1])[0]),
1867
1743
                                (None, current_path_info[2]),
1868
1744
                                (None, new_executable))
1869
 
                            changed = True
1870
1745
                        # dont descend into this unversioned path if it is
1871
1746
                        # a dir
1872
1747
                        if current_path_info[2] in ('directory'):
1885
1760
                                current_path_info)
1886
1761
                    else:
1887
1762
                        current_path_info = None
1888
 
                if changed is not None:
 
1763
                if result is not None:
1889
1764
                    # Found a result on this pass, yield it
1890
 
                    if changed:
1891
 
                        self._gather_result_for_consistency(result)
1892
 
                    if changed or self.include_unchanged:
1893
 
                        return result
 
1765
                    return result
1894
1766
            if self.current_block is not None:
1895
1767
                self.block_index = self.block_index + 1
1896
1768
                self._update_current_block()
1900
1772
                try:
1901
1773
                    self.current_dir_info = self.dir_iterator.next()
1902
1774
                    self.current_dir_list = self.current_dir_info[1]
1903
 
                except StopIteration, _:
 
1775
                except StopIteration:
1904
1776
                    self.current_dir_info = None
1905
 
 
1906
 
    cdef object _next_consistent_entries(self):
1907
 
        """Grabs the next specific file parent case to consider.
1908
 
        
1909
 
        :return: A list of the results, each of which is as for _process_entry.
1910
 
        """
1911
 
        results = []
1912
 
        while self.search_specific_file_parents:
1913
 
            # Process the parent directories for the paths we were iterating.
1914
 
            # Even in extremely large trees this should be modest, so currently
1915
 
            # no attempt is made to optimise.
1916
 
            path_utf8 = self.search_specific_file_parents.pop()
1917
 
            if path_utf8 in self.searched_exact_paths:
1918
 
                # We've examined this path.
1919
 
                continue
1920
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1921
 
                # We've examined this path.
1922
 
                continue
1923
 
            path_entries = self.state._entries_for_path(path_utf8)
1924
 
            # We need either one or two entries. If the path in
1925
 
            # self.target_index has moved (so the entry in source_index is in
1926
 
            # 'ar') then we need to also look for the entry for this path in
1927
 
            # self.source_index, to output the appropriate delete-or-rename.
1928
 
            selected_entries = []
1929
 
            found_item = False
1930
 
            for candidate_entry in path_entries:
1931
 
                # Find entries present in target at this path:
1932
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
1933
 
                    found_item = True
1934
 
                    selected_entries.append(candidate_entry)
1935
 
                # Find entries present in source at this path:
1936
 
                elif (self.source_index is not None and
1937
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
1938
 
                    found_item = True
1939
 
                    if candidate_entry[1][self.target_index][0] == 'a':
1940
 
                        # Deleted, emit it here.
1941
 
                        selected_entries.append(candidate_entry)
1942
 
                    else:
1943
 
                        # renamed, emit it when we process the directory it
1944
 
                        # ended up at.
1945
 
                        self.search_specific_file_parents.add(
1946
 
                            candidate_entry[1][self.target_index][1])
1947
 
            if not found_item:
1948
 
                raise AssertionError(
1949
 
                    "Missing entry for specific path parent %r, %r" % (
1950
 
                    path_utf8, path_entries))
1951
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1952
 
            for entry in selected_entries:
1953
 
                if entry[0][2] in self.seen_ids:
1954
 
                    continue
1955
 
                result, changed = self._process_entry(entry, path_info)
1956
 
                if changed is None:
1957
 
                    raise AssertionError(
1958
 
                        "Got entry<->path mismatch for specific path "
1959
 
                        "%r entry %r path_info %r " % (
1960
 
                        path_utf8, entry, path_info))
1961
 
                # Only include changes - we're outside the users requested
1962
 
                # expansion.
1963
 
                if changed:
1964
 
                    self._gather_result_for_consistency(result)
1965
 
                    if (result[6][0] == 'directory' and
1966
 
                        result[6][1] != 'directory'):
1967
 
                        # This stopped being a directory, the old children have
1968
 
                        # to be included.
1969
 
                        if entry[1][self.source_index][0] == 'r':
1970
 
                            # renamed, take the source path
1971
 
                            entry_path_utf8 = entry[1][self.source_index][1]
1972
 
                        else:
1973
 
                            entry_path_utf8 = path_utf8
1974
 
                        initial_key = (entry_path_utf8, '', '')
1975
 
                        block_index, _ = self.state._find_block_index_from_key(
1976
 
                            initial_key)
1977
 
                        if block_index == 0:
1978
 
                            # The children of the root are in block index 1.
1979
 
                            block_index = block_index + 1
1980
 
                        current_block = None
1981
 
                        if block_index < len(self.state._dirblocks):
1982
 
                            current_block = self.state._dirblocks[block_index]
1983
 
                            if not osutils.is_inside(
1984
 
                                entry_path_utf8, current_block[0]):
1985
 
                                # No entries for this directory at all.
1986
 
                                current_block = None
1987
 
                        if current_block is not None:
1988
 
                            for entry in current_block[1]:
1989
 
                                if entry[1][self.source_index][0] in 'ar':
1990
 
                                    # Not in the source tree, so doesn't have to be
1991
 
                                    # included.
1992
 
                                    continue
1993
 
                                # Path of the entry itself.
1994
 
                                self.search_specific_file_parents.add(
1995
 
                                    self.pathjoin(*entry[0][:2]))
1996
 
                if changed or self.include_unchanged:
1997
 
                    results.append((result, changed))
1998
 
            self.searched_exact_paths.add(path_utf8)
1999
 
        return results
2000
 
 
2001
 
    cdef object _path_info(self, utf8_path, unicode_path):
2002
 
        """Generate path_info for unicode_path.
2003
 
 
2004
 
        :return: None if unicode_path does not exist, or a path_info tuple.
2005
 
        """
2006
 
        abspath = self.tree.abspath(unicode_path)
2007
 
        try:
2008
 
            stat = os.lstat(abspath)
2009
 
        except OSError, e:
2010
 
            if e.errno == errno.ENOENT:
2011
 
                # the path does not exist.
2012
 
                return None
2013
 
            else:
2014
 
                raise
2015
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
2016
 
        dir_info = (utf8_path, utf8_basename,
2017
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
2018
 
            abspath)
2019
 
        if dir_info[2] == 'directory':
2020
 
            if self.tree._directory_is_tree_reference(
2021
 
                unicode_path):
2022
 
                self.root_dir_info = self.root_dir_info[:2] + \
2023
 
                    ('tree-reference',) + self.root_dir_info[3:]
2024
 
        return dir_info