~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_pyx.pyx

  • Committer: Martin
  • Date: 2010-05-16 15:18:43 UTC
  • mfrom: (5235 +trunk)
  • mto: This revision was merged to the branch mainline in revision 5239.
  • Revision ID: gzlist@googlemail.com-20100516151843-lu53u7caehm3ie3i
Merge bzr.dev to resolve conflicts in NEWS and _chk_map_pyx

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007, 2008, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import pathjoin, splitpath
 
31
from bzrlib.osutils import parent_directories, pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_c.h":
 
57
cdef extern from "_dirstate_helpers_pyx.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
119
119
    # void *memrchr(void *s, int c, size_t len)
120
120
 
121
121
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n):
 
122
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
123
123
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
124
    cdef char *pos
125
125
    cdef char *start
156
156
        return None
157
157
    return <char*>found - <char*>_s
158
158
 
 
159
 
159
160
cdef object safe_string_from_size(char *s, Py_ssize_t size):
160
161
    if size < 0:
161
 
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
162
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d @0x%x'
164
 
            % (size, <int>s))
 
163
            'tried to create a string with an invalid size: %d'
 
164
            % (size))
165
165
    return PyString_FromStringAndSize(s, size)
166
166
 
167
167
 
168
 
cdef int _is_aligned(void *ptr):
 
168
cdef int _is_aligned(void *ptr): # cannot_raise
169
169
    """Is this pointer aligned to an integer size offset?
170
170
 
171
171
    :return: 1 if this pointer is aligned, 0 otherwise.
173
173
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
174
 
175
175
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
 
176
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
177
177
    cdef unsigned char *cur1
178
178
    cdef unsigned char *cur2
179
179
    cdef unsigned char *end1
237
237
    return 0
238
238
 
239
239
 
240
 
def cmp_by_dirs_c(path1, path2):
 
240
def cmp_by_dirs(path1, path2):
241
241
    """Compare two paths directory by directory.
242
242
 
243
243
    This is equivalent to doing::
266
266
                        PyString_Size(path2))
267
267
 
268
268
 
269
 
def _cmp_path_by_dirblock_c(path1, path2):
 
269
def _cmp_path_by_dirblock(path1, path2):
270
270
    """Compare two paths based on what directory they are in.
271
271
 
272
272
    This generates a sort order, such that all children of a directory are
288
288
    if not PyString_CheckExact(path2):
289
289
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
290
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock(PyString_AsString(path1),
292
 
                                 PyString_Size(path1),
293
 
                                 PyString_AsString(path2),
294
 
                                 PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
 
                               char *path2, int path2_len):
 
291
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
 
292
                                        PyString_Size(path1),
 
293
                                        PyString_AsString(path2),
 
294
                                        PyString_Size(path2))
 
295
 
 
296
 
 
297
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
 
298
                                      char *path2, int path2_len): # cannot_raise
299
299
    """Compare two paths by what directory they are in.
300
300
 
301
 
    see ``_cmp_path_by_dirblock_c`` for details.
 
301
    see ``_cmp_path_by_dirblock`` for details.
302
302
    """
303
303
    cdef char *dirname1
304
304
    cdef int dirname1_len
368
368
    return 1
369
369
 
370
370
 
371
 
def _bisect_path_left_c(paths, path):
 
371
def _bisect_path_left(paths, path):
372
372
    """Return the index where to insert path into paths.
373
373
 
374
374
    This uses a path-wise comparison so we get::
413
413
        cur = PyList_GetItem_object_void(paths, _mid)
414
414
        cur_cstr = PyString_AS_STRING_void(cur)
415
415
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
 
416
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
 
417
                                        path_cstr, path_size) < 0:
417
418
            _lo = _mid + 1
418
419
        else:
419
420
            _hi = _mid
420
421
    return _lo
421
422
 
422
423
 
423
 
def _bisect_path_right_c(paths, path):
 
424
def _bisect_path_right(paths, path):
424
425
    """Return the index where to insert path into paths.
425
426
 
426
427
    This uses a path-wise comparison so we get::
465
466
        cur = PyList_GetItem_object_void(paths, _mid)
466
467
        cur_cstr = PyString_AS_STRING_void(cur)
467
468
        cur_size = PyString_GET_SIZE_void(cur)
468
 
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
 
469
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
 
470
                                        cur_cstr, cur_size) < 0:
469
471
            _hi = _mid
470
472
        else:
471
473
            _lo = _mid + 1
472
474
    return _lo
473
475
 
474
476
 
475
 
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
 
477
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
476
478
    """Return the index where to insert dirname into the dirblocks.
477
479
 
478
480
    The return value idx is such that all directories blocks in dirblock[:idx]
744
746
        self.state._split_root_dirblock_into_contents()
745
747
 
746
748
 
747
 
def _read_dirblocks_c(state):
 
749
def _read_dirblocks(state):
748
750
    """Read in the dirblocks for the given DirState object.
749
751
 
750
752
    This is tightly bound to the DirState internal representation. It should be
766
768
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
767
769
 
768
770
 
769
 
cdef int minikind_from_mode(int mode):
 
771
cdef int minikind_from_mode(int mode): # cannot_raise
770
772
    # in order of frequency:
771
773
    if S_ISREG(mode):
772
774
        return c"f"
913
915
    return link_or_sha1
914
916
 
915
917
 
916
 
cdef char _minikind_from_string(object string):
 
918
# TODO: Do we want to worry about exceptions here?
 
919
cdef char _minikind_from_string(object string) except? -1:
917
920
    """Convert a python string to a char."""
918
921
    return PyString_AsString(string)[0]
919
922
 
951
954
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
952
955
 
953
956
 
954
 
cdef int _versioned_minikind(char minikind):
 
957
cdef int _versioned_minikind(char minikind): # cannot_raise
955
958
    """Return non-zero if minikind is in fltd"""
956
959
    return (minikind == c'f' or
957
960
            minikind == c'd' or
961
964
 
962
965
cdef class ProcessEntryC:
963
966
 
 
967
    cdef int doing_consistency_expansion
964
968
    cdef object old_dirname_to_file_id # dict
965
969
    cdef object new_dirname_to_file_id # dict
966
 
    cdef readonly object uninteresting
967
970
    cdef object last_source_parent
968
971
    cdef object last_target_parent
969
 
    cdef object include_unchanged
 
972
    cdef int include_unchanged
 
973
    cdef int partial
970
974
    cdef object use_filesystem_for_exec
971
975
    cdef object utf8_decode
972
976
    cdef readonly object searched_specific_files
 
977
    cdef readonly object searched_exact_paths
973
978
    cdef object search_specific_files
 
979
    # The parents up to the root of the paths we are searching.
 
980
    # After all normal paths are returned, these specific items are returned.
 
981
    cdef object search_specific_file_parents
974
982
    cdef object state
975
983
    # Current iteration variables:
976
984
    cdef object current_root
988
996
    cdef object current_block_list
989
997
    cdef object current_dir_info
990
998
    cdef object current_dir_list
 
999
    cdef object _pending_consistent_entries # list
991
1000
    cdef int path_index
992
1001
    cdef object root_dir_info
993
1002
    cdef object bisect_left
994
1003
    cdef object pathjoin
995
1004
    cdef object fstat
 
1005
    # A set of the ids we've output when doing partial output.
 
1006
    cdef object seen_ids
996
1007
    cdef object sha_file
997
1008
 
998
1009
    def __init__(self, include_unchanged, use_filesystem_for_exec,
999
1010
        search_specific_files, state, source_index, target_index,
1000
1011
        want_unversioned, tree):
 
1012
        self.doing_consistency_expansion = 0
1001
1013
        self.old_dirname_to_file_id = {}
1002
1014
        self.new_dirname_to_file_id = {}
1003
 
        # Just a sentry, so that _process_entry can say that this
1004
 
        # record is handled, but isn't interesting to process (unchanged)
1005
 
        self.uninteresting = object()
 
1015
        # Are we doing a partial iter_changes?
 
1016
        self.partial = set(['']).__ne__(search_specific_files)
1006
1017
        # Using a list so that we can access the values and change them in
1007
1018
        # nested scope. Each one is [path, file_id, entry]
1008
1019
        self.last_source_parent = [None, None]
1009
1020
        self.last_target_parent = [None, None]
1010
 
        self.include_unchanged = include_unchanged
 
1021
        if include_unchanged is None:
 
1022
            self.include_unchanged = False
 
1023
        else:
 
1024
            self.include_unchanged = int(include_unchanged)
1011
1025
        self.use_filesystem_for_exec = use_filesystem_for_exec
1012
1026
        self.utf8_decode = cache_utf8._utf8_decode
1013
1027
        # for all search_indexs in each path at or under each element of
1014
 
        # search_specific_files, if the detail is relocated: add the id, and add the
1015
 
        # relocated path as one to search if its not searched already. If the
1016
 
        # detail is not relocated, add the id.
 
1028
        # search_specific_files, if the detail is relocated: add the id, and
 
1029
        # add the relocated path as one to search if its not searched already.
 
1030
        # If the detail is not relocated, add the id.
1017
1031
        self.searched_specific_files = set()
 
1032
        # When we search exact paths without expanding downwards, we record
 
1033
        # that here.
 
1034
        self.searched_exact_paths = set()
1018
1035
        self.search_specific_files = search_specific_files
 
1036
        # The parents up to the root of the paths we are searching.
 
1037
        # After all normal paths are returned, these specific items are returned.
 
1038
        self.search_specific_file_parents = set()
 
1039
        # The ids we've sent out in the delta.
 
1040
        self.seen_ids = set()
1019
1041
        self.state = state
1020
1042
        self.current_root = None
1021
1043
        self.current_root_unicode = None
1037
1059
        self.current_block_pos = -1
1038
1060
        self.current_dir_info = None
1039
1061
        self.current_dir_list = None
 
1062
        self._pending_consistent_entries = []
1040
1063
        self.path_index = 0
1041
1064
        self.root_dir_info = None
1042
1065
        self.bisect_left = bisect.bisect_left
1043
1066
        self.pathjoin = osutils.pathjoin
1044
1067
        self.fstat = os.fstat
1045
1068
        self.sha_file = osutils.sha_file
 
1069
        if target_index != 0:
 
1070
            # A lot of code in here depends on target_index == 0
 
1071
            raise errors.BzrError('unsupported target index')
1046
1072
 
1047
1073
    cdef _process_entry(self, entry, path_info):
1048
1074
        """Compare an entry and real disk to generate delta information.
1049
1075
 
1050
1076
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1051
 
            the path of entry. If None, then the path is considered absent.
1052
 
            (Perhaps we should pass in a concrete entry for this ?)
 
1077
            the path of entry. If None, then the path is considered absent in 
 
1078
            the target (Perhaps we should pass in a concrete entry for this ?)
1053
1079
            Basename is returned as a utf8 string because we expect this
1054
1080
            tuple will be ignored, and don't want to take the time to
1055
1081
            decode.
1056
 
        :return: None if the these don't match
1057
 
                 A tuple of information about the change, or
1058
 
                 the object 'uninteresting' if these match, but are
1059
 
                 basically identical.
 
1082
        :return: (iter_changes_result, changed). If the entry has not been
 
1083
            handled then changed is None. Otherwise it is False if no content
 
1084
            or metadata changes have occured, and True if any content or
 
1085
            metadata change has occurred. If self.include_unchanged is True then
 
1086
            if changed is not None, iter_changes_result will always be a result
 
1087
            tuple. Otherwise, iter_changes_result is None unless changed is
 
1088
            True.
1060
1089
        """
1061
1090
        cdef char target_minikind
1062
1091
        cdef char source_minikind
1098
1127
            else:
1099
1128
                # add the source to the search path to find any children it
1100
1129
                # has.  TODO ? : only add if it is a container ?
1101
 
                if not osutils.is_inside_any(self.searched_specific_files,
1102
 
                                             source_details[1]):
 
1130
                if (not self.doing_consistency_expansion and 
 
1131
                    not osutils.is_inside_any(self.searched_specific_files,
 
1132
                                             source_details[1])):
1103
1133
                    self.search_specific_files.add(source_details[1])
 
1134
                    # expanding from a user requested path, parent expansion
 
1135
                    # for delta consistency happens later.
1104
1136
                # generate the old path; this is needed for stating later
1105
1137
                # as well.
1106
1138
                old_path = source_details[1]
1140
1172
                    if source_minikind != c'f':
1141
1173
                        content_change = 1
1142
1174
                    else:
1143
 
                        # If the size is the same, check the sha:
1144
 
                        if target_details[2] == source_details[2]:
1145
 
                            if link_or_sha1 is None:
1146
 
                                # Stat cache miss:
1147
 
                                statvalue, link_or_sha1 = \
1148
 
                                    self.state._sha1_provider.stat_and_sha1(
1149
 
                                    path_info[4])
1150
 
                                self.state._observed_sha1(entry, link_or_sha1,
1151
 
                                    statvalue)
1152
 
                            content_change = (link_or_sha1 != source_details[1])
1153
 
                        else:
1154
 
                            # Size changed, so must be different
1155
 
                            content_change = 1
 
1175
                        # Check the sha. We can't just rely on the size as
 
1176
                        # content filtering may mean differ sizes actually
 
1177
                        # map to the same content
 
1178
                        if link_or_sha1 is None:
 
1179
                            # Stat cache miss:
 
1180
                            statvalue, link_or_sha1 = \
 
1181
                                self.state._sha1_provider.stat_and_sha1(
 
1182
                                path_info[4])
 
1183
                            self.state._observed_sha1(entry, link_or_sha1,
 
1184
                                statvalue)
 
1185
                        content_change = (link_or_sha1 != source_details[1])
1156
1186
                    # Target details is updated at update_entry time
1157
1187
                    if self.use_filesystem_for_exec:
1158
1188
                        # We don't need S_ISREG here, because we are sure
1173
1203
                        content_change = 0
1174
1204
                    target_exec = False
1175
1205
                else:
1176
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
1206
                    if path is None:
 
1207
                        path = self.pathjoin(old_dirname, old_basename)
 
1208
                    raise errors.BadFileKindError(path, path_info[2])
1177
1209
            if source_minikind == c'd':
1178
1210
                if path is None:
1179
1211
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1181
1213
                    file_id = entry[0][2]
1182
1214
                self.old_dirname_to_file_id[old_path] = file_id
1183
1215
            # parent id is the entry for the path in the target tree
1184
 
            if old_dirname == self.last_source_parent[0]:
 
1216
            if old_basename and old_dirname == self.last_source_parent[0]:
 
1217
                # use a cached hit for non-root source entries.
1185
1218
                source_parent_id = self.last_source_parent[1]
1186
1219
            else:
1187
1220
                try:
1197
1230
                    self.last_source_parent[0] = old_dirname
1198
1231
                    self.last_source_parent[1] = source_parent_id
1199
1232
            new_dirname = entry[0][0]
1200
 
            if new_dirname == self.last_target_parent[0]:
 
1233
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
1234
                # use a cached hit for non-root target entries.
1201
1235
                target_parent_id = self.last_target_parent[1]
1202
1236
            else:
1203
1237
                try:
1220
1254
                    self.last_target_parent[1] = target_parent_id
1221
1255
 
1222
1256
            source_exec = source_details[3]
1223
 
            if (self.include_unchanged
1224
 
                or content_change
 
1257
            changed = (content_change
1225
1258
                or source_parent_id != target_parent_id
1226
1259
                or old_basename != entry[0][1]
1227
1260
                or source_exec != target_exec
1228
 
                ):
 
1261
                )
 
1262
            if not changed and not self.include_unchanged:
 
1263
                return None, False
 
1264
            else:
1229
1265
                if old_path is None:
1230
1266
                    path = self.pathjoin(old_dirname, old_basename)
1231
1267
                    old_path = path
1245
1281
                       (source_parent_id, target_parent_id),
1246
1282
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1247
1283
                       (source_kind, target_kind),
1248
 
                       (source_exec, target_exec))
1249
 
            else:
1250
 
                return self.uninteresting
 
1284
                       (source_exec, target_exec)), changed
1251
1285
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1252
1286
            # looks like a new file
1253
1287
            path = self.pathjoin(entry[0][0], entry[0][1])
1280
1314
                       (None, parent_id),
1281
1315
                       (None, self.utf8_decode(entry[0][1])[0]),
1282
1316
                       (None, path_info[2]),
1283
 
                       (None, target_exec))
 
1317
                       (None, target_exec)), True
1284
1318
            else:
1285
1319
                # Its a missing file, report it as such.
1286
1320
                return (entry[0][2],
1290
1324
                       (None, parent_id),
1291
1325
                       (None, self.utf8_decode(entry[0][1])[0]),
1292
1326
                       (None, None),
1293
 
                       (None, False))
 
1327
                       (None, False)), True
1294
1328
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1295
1329
            # unversioned, possibly, or possibly not deleted: we dont care.
1296
1330
            # if its still on disk, *and* theres no other entry at this
1308
1342
                   (parent_id, None),
1309
1343
                   (self.utf8_decode(entry[0][1])[0], None),
1310
1344
                   (_minikind_to_kind(source_minikind), None),
1311
 
                   (source_details[3], None))
 
1345
                   (source_details[3], None)), True
1312
1346
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1313
1347
            # a rename; could be a true rename, or a rename inherited from
1314
1348
            # a renamed parent. TODO: handle this efficiently. Its not
1315
1349
            # common case to rename dirs though, so a correct but slow
1316
1350
            # implementation will do.
1317
 
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
1351
            if (not self.doing_consistency_expansion and 
 
1352
                not osutils.is_inside_any(self.searched_specific_files,
 
1353
                    target_details[1])):
1318
1354
                self.search_specific_files.add(target_details[1])
 
1355
                # We don't expand the specific files parents list here as
 
1356
                # the path is absent in target and won't create a delta with
 
1357
                # missing parent.
1319
1358
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1320
1359
              (target_minikind == c'r' or target_minikind == c'a')):
1321
1360
            # neither of the selected trees contain this path,
1327
1366
                "source_minikind=%r, target_minikind=%r"
1328
1367
                % (source_minikind, target_minikind))
1329
1368
            ## import pdb;pdb.set_trace()
1330
 
        return None
 
1369
        return None, None
1331
1370
 
1332
1371
    def __iter__(self):
1333
1372
        return self
1335
1374
    def iter_changes(self):
1336
1375
        return self
1337
1376
 
1338
 
    cdef void _update_current_block(self):
 
1377
    cdef int _gather_result_for_consistency(self, result) except -1:
 
1378
        """Check a result we will yield to make sure we are consistent later.
 
1379
        
 
1380
        This gathers result's parents into a set to output later.
 
1381
 
 
1382
        :param result: A result tuple.
 
1383
        """
 
1384
        if not self.partial or not result[0]:
 
1385
            return 0
 
1386
        self.seen_ids.add(result[0])
 
1387
        new_path = result[1][1]
 
1388
        if new_path:
 
1389
            # Not the root and not a delete: queue up the parents of the path.
 
1390
            self.search_specific_file_parents.update(
 
1391
                osutils.parent_directories(new_path.encode('utf8')))
 
1392
            # Add the root directory which parent_directories does not
 
1393
            # provide.
 
1394
            self.search_specific_file_parents.add('')
 
1395
        return 0
 
1396
 
 
1397
    cdef int _update_current_block(self) except -1:
1339
1398
        if (self.block_index < len(self.state._dirblocks) and
1340
1399
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1341
1400
            self.current_block = self.state._dirblocks[self.block_index]
1344
1403
        else:
1345
1404
            self.current_block = None
1346
1405
            self.current_block_list = None
 
1406
        return 0
1347
1407
 
1348
1408
    def __next__(self):
1349
1409
        # Simple thunk to allow tail recursion without pyrex confusion
1401
1461
        cdef char * current_dirname_c, * current_blockname_c
1402
1462
        cdef int advance_entry, advance_path
1403
1463
        cdef int path_handled
1404
 
        uninteresting = self.uninteresting
1405
1464
        searched_specific_files = self.searched_specific_files
1406
1465
        # Are we walking a root?
1407
1466
        while self.root_entries_pos < self.root_entries_len:
1408
1467
            entry = self.root_entries[self.root_entries_pos]
1409
1468
            self.root_entries_pos = self.root_entries_pos + 1
1410
 
            result = self._process_entry(entry, self.root_dir_info)
1411
 
            if result is not None and result is not self.uninteresting:
1412
 
                return result
 
1469
            result, changed = self._process_entry(entry, self.root_dir_info)
 
1470
            if changed is not None:
 
1471
                if changed:
 
1472
                    self._gather_result_for_consistency(result)
 
1473
                if changed or self.include_unchanged:
 
1474
                    return result
1413
1475
        # Have we finished the prior root, or never started one ?
1414
1476
        if self.current_root is None:
1415
1477
            # TODO: the pending list should be lexically sorted?  the
1418
1480
                self.current_root = self.search_specific_files.pop()
1419
1481
            except KeyError:
1420
1482
                raise StopIteration()
1421
 
            self.current_root_unicode = self.current_root.decode('utf8')
1422
1483
            self.searched_specific_files.add(self.current_root)
1423
1484
            # process the entries for this containing directory: the rest will be
1424
1485
            # found by their parents recursively.
1425
1486
            self.root_entries = self.state._entries_for_path(self.current_root)
1426
1487
            self.root_entries_len = len(self.root_entries)
 
1488
            self.current_root_unicode = self.current_root.decode('utf8')
1427
1489
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1428
1490
            try:
1429
1491
                root_stat = os.lstat(self.root_abspath)
1457
1519
            while self.root_entries_pos < self.root_entries_len:
1458
1520
                entry = self.root_entries[self.root_entries_pos]
1459
1521
                self.root_entries_pos = self.root_entries_pos + 1
1460
 
                result = self._process_entry(entry, self.root_dir_info)
1461
 
                if result is not None:
 
1522
                result, changed = self._process_entry(entry, self.root_dir_info)
 
1523
                if changed is not None:
1462
1524
                    path_handled = -1
1463
 
                    if result is not self.uninteresting:
 
1525
                    if changed:
 
1526
                        self._gather_result_for_consistency(result)
 
1527
                    if changed or self.include_unchanged:
1464
1528
                        return result
1465
1529
            # handle unversioned specified paths:
1466
1530
            if self.want_unversioned and not path_handled and self.root_dir_info:
1478
1542
                      )
1479
1543
            # If we reach here, the outer flow continues, which enters into the
1480
1544
            # per-root setup logic.
1481
 
        if self.current_dir_info is None and self.current_block is None:
 
1545
        if (self.current_dir_info is None and self.current_block is None and not
 
1546
            self.doing_consistency_expansion):
1482
1547
            # setup iteration of this root:
1483
1548
            self.current_dir_list = None
1484
1549
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1606
1671
                        self.current_block_pos = self.current_block_pos + 1
1607
1672
                        # entry referring to file not present on disk.
1608
1673
                        # advance the entry only, after processing.
1609
 
                        result = self._process_entry(current_entry, None)
1610
 
                        if result is not None:
1611
 
                            if result is not self.uninteresting:
 
1674
                        result, changed = self._process_entry(current_entry, None)
 
1675
                        if changed is not None:
 
1676
                            if changed:
 
1677
                                self._gather_result_for_consistency(result)
 
1678
                            if changed or self.include_unchanged:
1612
1679
                                return result
1613
1680
                    self.block_index = self.block_index + 1
1614
1681
                    self._update_current_block()
1620
1687
            # More supplied paths to process
1621
1688
            self.current_root = None
1622
1689
            return self._iter_next()
 
1690
        # Start expanding more conservatively, adding paths the user may not
 
1691
        # have intended but required for consistent deltas.
 
1692
        self.doing_consistency_expansion = 1
 
1693
        if not self._pending_consistent_entries:
 
1694
            self._pending_consistent_entries = self._next_consistent_entries()
 
1695
        while self._pending_consistent_entries:
 
1696
            result, changed = self._pending_consistent_entries.pop()
 
1697
            if changed is not None:
 
1698
                return result
1623
1699
        raise StopIteration()
1624
1700
 
1625
1701
    cdef object _maybe_tree_ref(self, current_path_info):
1675
1751
                    pass
1676
1752
                elif current_path_info is None:
1677
1753
                    # no path is fine: the per entry code will handle it.
1678
 
                    result = self._process_entry(current_entry, current_path_info)
1679
 
                    if result is not None:
1680
 
                        if result is self.uninteresting:
1681
 
                            result = None
 
1754
                    result, changed = self._process_entry(current_entry,
 
1755
                        current_path_info)
1682
1756
                else:
1683
1757
                    minikind = _minikind_from_string(
1684
1758
                        current_entry[1][self.target_index][0])
1699
1773
                        else:
1700
1774
                            # entry referring to file not present on disk.
1701
1775
                            # advance the entry only, after processing.
1702
 
                            result = self._process_entry(current_entry, None)
1703
 
                            if result is not None:
1704
 
                                if result is self.uninteresting:
1705
 
                                    result = None
 
1776
                            result, changed = self._process_entry(current_entry,
 
1777
                                None)
1706
1778
                            advance_path = 0
1707
1779
                    else:
1708
1780
                        # paths are the same,and the dirstate entry is not
1709
1781
                        # absent or renamed.
1710
 
                        result = self._process_entry(current_entry, current_path_info)
1711
 
                        if result is not None:
 
1782
                        result, changed = self._process_entry(current_entry,
 
1783
                            current_path_info)
 
1784
                        if changed is not None:
1712
1785
                            path_handled = -1
1713
 
                            if result is self.uninteresting:
1714
 
                                result = None
 
1786
                            if not changed and not self.include_unchanged:
 
1787
                                changed = None
1715
1788
                # >- loop control starts here:
1716
1789
                # >- entry
1717
1790
                if advance_entry and current_entry is not None:
1733
1806
                            except UnicodeDecodeError:
1734
1807
                                raise errors.BadFilenameEncoding(
1735
1808
                                    current_path_info[0], osutils._fs_enc)
1736
 
                            if result is not None:
 
1809
                            if changed is not None:
1737
1810
                                raise AssertionError(
1738
1811
                                    "result is not None: %r" % result)
1739
1812
                            result = (None,
1744
1817
                                (None, self.utf8_decode(current_path_info[1])[0]),
1745
1818
                                (None, current_path_info[2]),
1746
1819
                                (None, new_executable))
 
1820
                            changed = True
1747
1821
                        # dont descend into this unversioned path if it is
1748
1822
                        # a dir
1749
1823
                        if current_path_info[2] in ('directory'):
1762
1836
                                current_path_info)
1763
1837
                    else:
1764
1838
                        current_path_info = None
1765
 
                if result is not None:
 
1839
                if changed is not None:
1766
1840
                    # Found a result on this pass, yield it
1767
 
                    return result
 
1841
                    if changed:
 
1842
                        self._gather_result_for_consistency(result)
 
1843
                    if changed or self.include_unchanged:
 
1844
                        return result
1768
1845
            if self.current_block is not None:
1769
1846
                self.block_index = self.block_index + 1
1770
1847
                self._update_current_block()
1776
1853
                    self.current_dir_list = self.current_dir_info[1]
1777
1854
                except StopIteration:
1778
1855
                    self.current_dir_info = None
 
1856
 
 
1857
    cdef object _next_consistent_entries(self):
 
1858
        """Grabs the next specific file parent case to consider.
 
1859
        
 
1860
        :return: A list of the results, each of which is as for _process_entry.
 
1861
        """
 
1862
        results = []
 
1863
        while self.search_specific_file_parents:
 
1864
            # Process the parent directories for the paths we were iterating.
 
1865
            # Even in extremely large trees this should be modest, so currently
 
1866
            # no attempt is made to optimise.
 
1867
            path_utf8 = self.search_specific_file_parents.pop()
 
1868
            if path_utf8 in self.searched_exact_paths:
 
1869
                # We've examined this path.
 
1870
                continue
 
1871
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
1872
                # We've examined this path.
 
1873
                continue
 
1874
            path_entries = self.state._entries_for_path(path_utf8)
 
1875
            # We need either one or two entries. If the path in
 
1876
            # self.target_index has moved (so the entry in source_index is in
 
1877
            # 'ar') then we need to also look for the entry for this path in
 
1878
            # self.source_index, to output the appropriate delete-or-rename.
 
1879
            selected_entries = []
 
1880
            found_item = False
 
1881
            for candidate_entry in path_entries:
 
1882
                # Find entries present in target at this path:
 
1883
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
1884
                    found_item = True
 
1885
                    selected_entries.append(candidate_entry)
 
1886
                # Find entries present in source at this path:
 
1887
                elif (self.source_index is not None and
 
1888
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
1889
                    found_item = True
 
1890
                    if candidate_entry[1][self.target_index][0] == 'a':
 
1891
                        # Deleted, emit it here.
 
1892
                        selected_entries.append(candidate_entry)
 
1893
                    else:
 
1894
                        # renamed, emit it when we process the directory it
 
1895
                        # ended up at.
 
1896
                        self.search_specific_file_parents.add(
 
1897
                            candidate_entry[1][self.target_index][1])
 
1898
            if not found_item:
 
1899
                raise AssertionError(
 
1900
                    "Missing entry for specific path parent %r, %r" % (
 
1901
                    path_utf8, path_entries))
 
1902
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
1903
            for entry in selected_entries:
 
1904
                if entry[0][2] in self.seen_ids:
 
1905
                    continue
 
1906
                result, changed = self._process_entry(entry, path_info)
 
1907
                if changed is None:
 
1908
                    raise AssertionError(
 
1909
                        "Got entry<->path mismatch for specific path "
 
1910
                        "%r entry %r path_info %r " % (
 
1911
                        path_utf8, entry, path_info))
 
1912
                # Only include changes - we're outside the users requested
 
1913
                # expansion.
 
1914
                if changed:
 
1915
                    self._gather_result_for_consistency(result)
 
1916
                    if (result[6][0] == 'directory' and
 
1917
                        result[6][1] != 'directory'):
 
1918
                        # This stopped being a directory, the old children have
 
1919
                        # to be included.
 
1920
                        if entry[1][self.source_index][0] == 'r':
 
1921
                            # renamed, take the source path
 
1922
                            entry_path_utf8 = entry[1][self.source_index][1]
 
1923
                        else:
 
1924
                            entry_path_utf8 = path_utf8
 
1925
                        initial_key = (entry_path_utf8, '', '')
 
1926
                        block_index, _ = self.state._find_block_index_from_key(
 
1927
                            initial_key)
 
1928
                        if block_index == 0:
 
1929
                            # The children of the root are in block index 1.
 
1930
                            block_index = block_index + 1
 
1931
                        current_block = None
 
1932
                        if block_index < len(self.state._dirblocks):
 
1933
                            current_block = self.state._dirblocks[block_index]
 
1934
                            if not osutils.is_inside(
 
1935
                                entry_path_utf8, current_block[0]):
 
1936
                                # No entries for this directory at all.
 
1937
                                current_block = None
 
1938
                        if current_block is not None:
 
1939
                            for entry in current_block[1]:
 
1940
                                if entry[1][self.source_index][0] in 'ar':
 
1941
                                    # Not in the source tree, so doesn't have to be
 
1942
                                    # included.
 
1943
                                    continue
 
1944
                                # Path of the entry itself.
 
1945
                                self.search_specific_file_parents.add(
 
1946
                                    self.pathjoin(*entry[0][:2]))
 
1947
                if changed or self.include_unchanged:
 
1948
                    results.append((result, changed))
 
1949
            self.searched_exact_paths.add(path_utf8)
 
1950
        return results
 
1951
 
 
1952
    cdef object _path_info(self, utf8_path, unicode_path):
 
1953
        """Generate path_info for unicode_path.
 
1954
 
 
1955
        :return: None if unicode_path does not exist, or a path_info tuple.
 
1956
        """
 
1957
        abspath = self.tree.abspath(unicode_path)
 
1958
        try:
 
1959
            stat = os.lstat(abspath)
 
1960
        except OSError, e:
 
1961
            if e.errno == errno.ENOENT:
 
1962
                # the path does not exist.
 
1963
                return None
 
1964
            else:
 
1965
                raise
 
1966
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
1967
        dir_info = (utf8_path, utf8_basename,
 
1968
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
1969
            abspath)
 
1970
        if dir_info[2] == 'directory':
 
1971
            if self.tree._directory_is_tree_reference(
 
1972
                unicode_path):
 
1973
                self.root_dir_info = self.root_dir_info[:2] + \
 
1974
                    ('tree-reference',) + self.root_dir_info[3:]
 
1975
        return dir_info