~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_pyx.pyx

  • Committer: John Arbash Meinel
  • Date: 2010-08-13 19:08:57 UTC
  • mto: (5050.17.7 2.2)
  • mto: This revision was merged to the branch mainline in revision 5379.
  • Revision ID: john@arbash-meinel.com-20100813190857-mvzwnimrxvm0zimp
Lots of documentation updates.

We had a lot of http links pointing to the old domain. They should
all now be properly updated to the new domain. (only bazaar-vcs.org
entry left is for pqm, which seems to still reside at the old url.)

Also removed one 'TODO' doc entry about switching to binary xdelta, since
we basically did just that with groupcompress.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007, 2008, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import pathjoin, splitpath
 
31
from bzrlib.osutils import parent_directories, pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_c.h":
 
57
cdef extern from "_dirstate_helpers_pyx.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
119
119
    # void *memrchr(void *s, int c, size_t len)
120
120
 
121
121
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n):
 
122
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
123
123
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
124
    cdef char *pos
125
125
    cdef char *start
156
156
        return None
157
157
    return <char*>found - <char*>_s
158
158
 
 
159
 
159
160
cdef object safe_string_from_size(char *s, Py_ssize_t size):
160
161
    if size < 0:
161
 
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
162
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d @0x%x'
164
 
            % (size, <int>s))
 
163
            'tried to create a string with an invalid size: %d'
 
164
            % (size))
165
165
    return PyString_FromStringAndSize(s, size)
166
166
 
167
167
 
168
 
cdef int _is_aligned(void *ptr):
 
168
cdef int _is_aligned(void *ptr): # cannot_raise
169
169
    """Is this pointer aligned to an integer size offset?
170
170
 
171
171
    :return: 1 if this pointer is aligned, 0 otherwise.
173
173
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
174
 
175
175
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
 
176
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
177
177
    cdef unsigned char *cur1
178
178
    cdef unsigned char *cur2
179
179
    cdef unsigned char *end1
237
237
    return 0
238
238
 
239
239
 
240
 
def cmp_by_dirs_c(path1, path2):
 
240
def cmp_by_dirs(path1, path2):
241
241
    """Compare two paths directory by directory.
242
242
 
243
243
    This is equivalent to doing::
266
266
                        PyString_Size(path2))
267
267
 
268
268
 
269
 
def _cmp_path_by_dirblock_c(path1, path2):
 
269
def _cmp_path_by_dirblock(path1, path2):
270
270
    """Compare two paths based on what directory they are in.
271
271
 
272
272
    This generates a sort order, such that all children of a directory are
288
288
    if not PyString_CheckExact(path2):
289
289
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
290
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock(PyString_AsString(path1),
292
 
                                 PyString_Size(path1),
293
 
                                 PyString_AsString(path2),
294
 
                                 PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
 
                               char *path2, int path2_len):
 
291
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
 
292
                                        PyString_Size(path1),
 
293
                                        PyString_AsString(path2),
 
294
                                        PyString_Size(path2))
 
295
 
 
296
 
 
297
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
 
298
                                      char *path2, int path2_len): # cannot_raise
299
299
    """Compare two paths by what directory they are in.
300
300
 
301
 
    see ``_cmp_path_by_dirblock_c`` for details.
 
301
    see ``_cmp_path_by_dirblock`` for details.
302
302
    """
303
303
    cdef char *dirname1
304
304
    cdef int dirname1_len
368
368
    return 1
369
369
 
370
370
 
371
 
def _bisect_path_left_c(paths, path):
 
371
def _bisect_path_left(paths, path):
372
372
    """Return the index where to insert path into paths.
373
373
 
374
374
    This uses a path-wise comparison so we get::
413
413
        cur = PyList_GetItem_object_void(paths, _mid)
414
414
        cur_cstr = PyString_AS_STRING_void(cur)
415
415
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
 
416
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
 
417
                                        path_cstr, path_size) < 0:
417
418
            _lo = _mid + 1
418
419
        else:
419
420
            _hi = _mid
420
421
    return _lo
421
422
 
422
423
 
423
 
def _bisect_path_right_c(paths, path):
 
424
def _bisect_path_right(paths, path):
424
425
    """Return the index where to insert path into paths.
425
426
 
426
427
    This uses a path-wise comparison so we get::
465
466
        cur = PyList_GetItem_object_void(paths, _mid)
466
467
        cur_cstr = PyString_AS_STRING_void(cur)
467
468
        cur_size = PyString_GET_SIZE_void(cur)
468
 
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
 
469
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
 
470
                                        cur_cstr, cur_size) < 0:
469
471
            _hi = _mid
470
472
        else:
471
473
            _lo = _mid + 1
472
474
    return _lo
473
475
 
474
476
 
475
 
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
 
477
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
476
478
    """Return the index where to insert dirname into the dirblocks.
477
479
 
478
480
    The return value idx is such that all directories blocks in dirblock[:idx]
744
746
        self.state._split_root_dirblock_into_contents()
745
747
 
746
748
 
747
 
def _read_dirblocks_c(state):
 
749
def _read_dirblocks(state):
748
750
    """Read in the dirblocks for the given DirState object.
749
751
 
750
752
    This is tightly bound to the DirState internal representation. It should be
766
768
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
767
769
 
768
770
 
769
 
cdef int minikind_from_mode(int mode):
 
771
cdef int minikind_from_mode(int mode): # cannot_raise
770
772
    # in order of frequency:
771
773
    if S_ISREG(mode):
772
774
        return c"f"
913
915
    return link_or_sha1
914
916
 
915
917
 
916
 
cdef char _minikind_from_string(object string):
 
918
# TODO: Do we want to worry about exceptions here?
 
919
cdef char _minikind_from_string(object string) except? -1:
917
920
    """Convert a python string to a char."""
918
921
    return PyString_AsString(string)[0]
919
922
 
951
954
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
952
955
 
953
956
 
954
 
cdef int _versioned_minikind(char minikind):
 
957
cdef int _versioned_minikind(char minikind): # cannot_raise
955
958
    """Return non-zero if minikind is in fltd"""
956
959
    return (minikind == c'f' or
957
960
            minikind == c'd' or
961
964
 
962
965
cdef class ProcessEntryC:
963
966
 
 
967
    cdef int doing_consistency_expansion
964
968
    cdef object old_dirname_to_file_id # dict
965
969
    cdef object new_dirname_to_file_id # dict
966
 
    cdef readonly object uninteresting
967
970
    cdef object last_source_parent
968
971
    cdef object last_target_parent
969
 
    cdef object include_unchanged
 
972
    cdef int include_unchanged
 
973
    cdef int partial
970
974
    cdef object use_filesystem_for_exec
971
975
    cdef object utf8_decode
972
976
    cdef readonly object searched_specific_files
 
977
    cdef readonly object searched_exact_paths
973
978
    cdef object search_specific_files
 
979
    # The parents up to the root of the paths we are searching.
 
980
    # After all normal paths are returned, these specific items are returned.
 
981
    cdef object search_specific_file_parents
974
982
    cdef object state
975
983
    # Current iteration variables:
976
984
    cdef object current_root
988
996
    cdef object current_block_list
989
997
    cdef object current_dir_info
990
998
    cdef object current_dir_list
 
999
    cdef object _pending_consistent_entries # list
991
1000
    cdef int path_index
992
1001
    cdef object root_dir_info
993
1002
    cdef object bisect_left
994
1003
    cdef object pathjoin
995
1004
    cdef object fstat
 
1005
    # A set of the ids we've output when doing partial output.
 
1006
    cdef object seen_ids
996
1007
    cdef object sha_file
997
1008
 
998
1009
    def __init__(self, include_unchanged, use_filesystem_for_exec,
999
1010
        search_specific_files, state, source_index, target_index,
1000
1011
        want_unversioned, tree):
 
1012
        self.doing_consistency_expansion = 0
1001
1013
        self.old_dirname_to_file_id = {}
1002
1014
        self.new_dirname_to_file_id = {}
1003
 
        # Just a sentry, so that _process_entry can say that this
1004
 
        # record is handled, but isn't interesting to process (unchanged)
1005
 
        self.uninteresting = object()
 
1015
        # Are we doing a partial iter_changes?
 
1016
        self.partial = set(['']).__ne__(search_specific_files)
1006
1017
        # Using a list so that we can access the values and change them in
1007
1018
        # nested scope. Each one is [path, file_id, entry]
1008
1019
        self.last_source_parent = [None, None]
1009
1020
        self.last_target_parent = [None, None]
1010
 
        self.include_unchanged = include_unchanged
 
1021
        if include_unchanged is None:
 
1022
            self.include_unchanged = False
 
1023
        else:
 
1024
            self.include_unchanged = int(include_unchanged)
1011
1025
        self.use_filesystem_for_exec = use_filesystem_for_exec
1012
1026
        self.utf8_decode = cache_utf8._utf8_decode
1013
1027
        # for all search_indexs in each path at or under each element of
1014
 
        # search_specific_files, if the detail is relocated: add the id, and add the
1015
 
        # relocated path as one to search if its not searched already. If the
1016
 
        # detail is not relocated, add the id.
 
1028
        # search_specific_files, if the detail is relocated: add the id, and
 
1029
        # add the relocated path as one to search if its not searched already.
 
1030
        # If the detail is not relocated, add the id.
1017
1031
        self.searched_specific_files = set()
 
1032
        # When we search exact paths without expanding downwards, we record
 
1033
        # that here.
 
1034
        self.searched_exact_paths = set()
1018
1035
        self.search_specific_files = search_specific_files
 
1036
        # The parents up to the root of the paths we are searching.
 
1037
        # After all normal paths are returned, these specific items are returned.
 
1038
        self.search_specific_file_parents = set()
 
1039
        # The ids we've sent out in the delta.
 
1040
        self.seen_ids = set()
1019
1041
        self.state = state
1020
1042
        self.current_root = None
1021
1043
        self.current_root_unicode = None
1037
1059
        self.current_block_pos = -1
1038
1060
        self.current_dir_info = None
1039
1061
        self.current_dir_list = None
 
1062
        self._pending_consistent_entries = []
1040
1063
        self.path_index = 0
1041
1064
        self.root_dir_info = None
1042
1065
        self.bisect_left = bisect.bisect_left
1043
1066
        self.pathjoin = osutils.pathjoin
1044
1067
        self.fstat = os.fstat
1045
1068
        self.sha_file = osutils.sha_file
 
1069
        if target_index != 0:
 
1070
            # A lot of code in here depends on target_index == 0
 
1071
            raise errors.BzrError('unsupported target index')
1046
1072
 
1047
1073
    cdef _process_entry(self, entry, path_info):
1048
1074
        """Compare an entry and real disk to generate delta information.
1049
1075
 
1050
1076
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1051
 
            the path of entry. If None, then the path is considered absent.
1052
 
            (Perhaps we should pass in a concrete entry for this ?)
 
1077
            the path of entry. If None, then the path is considered absent in 
 
1078
            the target (Perhaps we should pass in a concrete entry for this ?)
1053
1079
            Basename is returned as a utf8 string because we expect this
1054
1080
            tuple will be ignored, and don't want to take the time to
1055
1081
            decode.
1056
 
        :return: None if the these don't match
1057
 
                 A tuple of information about the change, or
1058
 
                 the object 'uninteresting' if these match, but are
1059
 
                 basically identical.
 
1082
        :return: (iter_changes_result, changed). If the entry has not been
 
1083
            handled then changed is None. Otherwise it is False if no content
 
1084
            or metadata changes have occured, and True if any content or
 
1085
            metadata change has occurred. If self.include_unchanged is True then
 
1086
            if changed is not None, iter_changes_result will always be a result
 
1087
            tuple. Otherwise, iter_changes_result is None unless changed is
 
1088
            True.
1060
1089
        """
1061
1090
        cdef char target_minikind
1062
1091
        cdef char source_minikind
1098
1127
            else:
1099
1128
                # add the source to the search path to find any children it
1100
1129
                # has.  TODO ? : only add if it is a container ?
1101
 
                if not osutils.is_inside_any(self.searched_specific_files,
1102
 
                                             source_details[1]):
 
1130
                if (not self.doing_consistency_expansion and 
 
1131
                    not osutils.is_inside_any(self.searched_specific_files,
 
1132
                                             source_details[1])):
1103
1133
                    self.search_specific_files.add(source_details[1])
 
1134
                    # expanding from a user requested path, parent expansion
 
1135
                    # for delta consistency happens later.
1104
1136
                # generate the old path; this is needed for stating later
1105
1137
                # as well.
1106
1138
                old_path = source_details[1]
1171
1203
                        content_change = 0
1172
1204
                    target_exec = False
1173
1205
                else:
1174
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
1206
                    if path is None:
 
1207
                        path = self.pathjoin(old_dirname, old_basename)
 
1208
                    raise errors.BadFileKindError(path, path_info[2])
1175
1209
            if source_minikind == c'd':
1176
1210
                if path is None:
1177
1211
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1179
1213
                    file_id = entry[0][2]
1180
1214
                self.old_dirname_to_file_id[old_path] = file_id
1181
1215
            # parent id is the entry for the path in the target tree
1182
 
            if old_dirname == self.last_source_parent[0]:
 
1216
            if old_basename and old_dirname == self.last_source_parent[0]:
 
1217
                # use a cached hit for non-root source entries.
1183
1218
                source_parent_id = self.last_source_parent[1]
1184
1219
            else:
1185
1220
                try:
1186
1221
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
1187
 
                except KeyError:
 
1222
                except KeyError, _:
1188
1223
                    source_parent_entry = self.state._get_entry(self.source_index,
1189
1224
                                                           path_utf8=old_dirname)
1190
1225
                    source_parent_id = source_parent_entry[0][2]
1195
1230
                    self.last_source_parent[0] = old_dirname
1196
1231
                    self.last_source_parent[1] = source_parent_id
1197
1232
            new_dirname = entry[0][0]
1198
 
            if new_dirname == self.last_target_parent[0]:
 
1233
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
1234
                # use a cached hit for non-root target entries.
1199
1235
                target_parent_id = self.last_target_parent[1]
1200
1236
            else:
1201
1237
                try:
1202
1238
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
1203
 
                except KeyError:
 
1239
                except KeyError, _:
1204
1240
                    # TODO: We don't always need to do the lookup, because the
1205
1241
                    #       parent entry will be the same as the source entry.
1206
1242
                    target_parent_entry = self.state._get_entry(self.target_index,
1218
1254
                    self.last_target_parent[1] = target_parent_id
1219
1255
 
1220
1256
            source_exec = source_details[3]
1221
 
            if (self.include_unchanged
1222
 
                or content_change
 
1257
            changed = (content_change
1223
1258
                or source_parent_id != target_parent_id
1224
1259
                or old_basename != entry[0][1]
1225
1260
                or source_exec != target_exec
1226
 
                ):
 
1261
                )
 
1262
            if not changed and not self.include_unchanged:
 
1263
                return None, False
 
1264
            else:
1227
1265
                if old_path is None:
1228
1266
                    path = self.pathjoin(old_dirname, old_basename)
1229
1267
                    old_path = path
1243
1281
                       (source_parent_id, target_parent_id),
1244
1282
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1245
1283
                       (source_kind, target_kind),
1246
 
                       (source_exec, target_exec))
1247
 
            else:
1248
 
                return self.uninteresting
 
1284
                       (source_exec, target_exec)), changed
1249
1285
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1250
1286
            # looks like a new file
1251
1287
            path = self.pathjoin(entry[0][0], entry[0][1])
1278
1314
                       (None, parent_id),
1279
1315
                       (None, self.utf8_decode(entry[0][1])[0]),
1280
1316
                       (None, path_info[2]),
1281
 
                       (None, target_exec))
 
1317
                       (None, target_exec)), True
1282
1318
            else:
1283
1319
                # Its a missing file, report it as such.
1284
1320
                return (entry[0][2],
1288
1324
                       (None, parent_id),
1289
1325
                       (None, self.utf8_decode(entry[0][1])[0]),
1290
1326
                       (None, None),
1291
 
                       (None, False))
 
1327
                       (None, False)), True
1292
1328
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1293
1329
            # unversioned, possibly, or possibly not deleted: we dont care.
1294
1330
            # if its still on disk, *and* theres no other entry at this
1306
1342
                   (parent_id, None),
1307
1343
                   (self.utf8_decode(entry[0][1])[0], None),
1308
1344
                   (_minikind_to_kind(source_minikind), None),
1309
 
                   (source_details[3], None))
 
1345
                   (source_details[3], None)), True
1310
1346
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1311
1347
            # a rename; could be a true rename, or a rename inherited from
1312
1348
            # a renamed parent. TODO: handle this efficiently. Its not
1313
1349
            # common case to rename dirs though, so a correct but slow
1314
1350
            # implementation will do.
1315
 
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
1351
            if (not self.doing_consistency_expansion and 
 
1352
                not osutils.is_inside_any(self.searched_specific_files,
 
1353
                    target_details[1])):
1316
1354
                self.search_specific_files.add(target_details[1])
 
1355
                # We don't expand the specific files parents list here as
 
1356
                # the path is absent in target and won't create a delta with
 
1357
                # missing parent.
1317
1358
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1318
1359
              (target_minikind == c'r' or target_minikind == c'a')):
1319
1360
            # neither of the selected trees contain this path,
1325
1366
                "source_minikind=%r, target_minikind=%r"
1326
1367
                % (source_minikind, target_minikind))
1327
1368
            ## import pdb;pdb.set_trace()
1328
 
        return None
 
1369
        return None, None
1329
1370
 
1330
1371
    def __iter__(self):
1331
1372
        return self
1333
1374
    def iter_changes(self):
1334
1375
        return self
1335
1376
 
1336
 
    cdef void _update_current_block(self):
 
1377
    cdef int _gather_result_for_consistency(self, result) except -1:
 
1378
        """Check a result we will yield to make sure we are consistent later.
 
1379
        
 
1380
        This gathers result's parents into a set to output later.
 
1381
 
 
1382
        :param result: A result tuple.
 
1383
        """
 
1384
        if not self.partial or not result[0]:
 
1385
            return 0
 
1386
        self.seen_ids.add(result[0])
 
1387
        new_path = result[1][1]
 
1388
        if new_path:
 
1389
            # Not the root and not a delete: queue up the parents of the path.
 
1390
            self.search_specific_file_parents.update(
 
1391
                osutils.parent_directories(new_path.encode('utf8')))
 
1392
            # Add the root directory which parent_directories does not
 
1393
            # provide.
 
1394
            self.search_specific_file_parents.add('')
 
1395
        return 0
 
1396
 
 
1397
    cdef int _update_current_block(self) except -1:
1337
1398
        if (self.block_index < len(self.state._dirblocks) and
1338
1399
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1339
1400
            self.current_block = self.state._dirblocks[self.block_index]
1342
1403
        else:
1343
1404
            self.current_block = None
1344
1405
            self.current_block_list = None
 
1406
        return 0
1345
1407
 
1346
1408
    def __next__(self):
1347
1409
        # Simple thunk to allow tail recursion without pyrex confusion
1399
1461
        cdef char * current_dirname_c, * current_blockname_c
1400
1462
        cdef int advance_entry, advance_path
1401
1463
        cdef int path_handled
1402
 
        uninteresting = self.uninteresting
1403
1464
        searched_specific_files = self.searched_specific_files
1404
1465
        # Are we walking a root?
1405
1466
        while self.root_entries_pos < self.root_entries_len:
1406
1467
            entry = self.root_entries[self.root_entries_pos]
1407
1468
            self.root_entries_pos = self.root_entries_pos + 1
1408
 
            result = self._process_entry(entry, self.root_dir_info)
1409
 
            if result is not None and result is not self.uninteresting:
1410
 
                return result
 
1469
            result, changed = self._process_entry(entry, self.root_dir_info)
 
1470
            if changed is not None:
 
1471
                if changed:
 
1472
                    self._gather_result_for_consistency(result)
 
1473
                if changed or self.include_unchanged:
 
1474
                    return result
1411
1475
        # Have we finished the prior root, or never started one ?
1412
1476
        if self.current_root is None:
1413
1477
            # TODO: the pending list should be lexically sorted?  the
1414
1478
            # interface doesn't require it.
1415
1479
            try:
1416
1480
                self.current_root = self.search_specific_files.pop()
1417
 
            except KeyError:
 
1481
            except KeyError, _:
1418
1482
                raise StopIteration()
1419
 
            self.current_root_unicode = self.current_root.decode('utf8')
1420
1483
            self.searched_specific_files.add(self.current_root)
1421
1484
            # process the entries for this containing directory: the rest will be
1422
1485
            # found by their parents recursively.
1423
1486
            self.root_entries = self.state._entries_for_path(self.current_root)
1424
1487
            self.root_entries_len = len(self.root_entries)
 
1488
            self.current_root_unicode = self.current_root.decode('utf8')
1425
1489
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1426
1490
            try:
1427
1491
                root_stat = os.lstat(self.root_abspath)
1455
1519
            while self.root_entries_pos < self.root_entries_len:
1456
1520
                entry = self.root_entries[self.root_entries_pos]
1457
1521
                self.root_entries_pos = self.root_entries_pos + 1
1458
 
                result = self._process_entry(entry, self.root_dir_info)
1459
 
                if result is not None:
 
1522
                result, changed = self._process_entry(entry, self.root_dir_info)
 
1523
                if changed is not None:
1460
1524
                    path_handled = -1
1461
 
                    if result is not self.uninteresting:
 
1525
                    if changed:
 
1526
                        self._gather_result_for_consistency(result)
 
1527
                    if changed or self.include_unchanged:
1462
1528
                        return result
1463
1529
            # handle unversioned specified paths:
1464
1530
            if self.want_unversioned and not path_handled and self.root_dir_info:
1476
1542
                      )
1477
1543
            # If we reach here, the outer flow continues, which enters into the
1478
1544
            # per-root setup logic.
1479
 
        if self.current_dir_info is None and self.current_block is None:
 
1545
        if (self.current_dir_info is None and self.current_block is None and not
 
1546
            self.doing_consistency_expansion):
1480
1547
            # setup iteration of this root:
1481
1548
            self.current_dir_list = None
1482
1549
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1500
1567
                        #            and e.winerror == ERROR_DIRECTORY
1501
1568
                        try:
1502
1569
                            e_winerror = e.winerror
1503
 
                        except AttributeError:
 
1570
                        except AttributeError, _:
1504
1571
                            e_winerror = None
1505
1572
                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
1506
1573
                        if (e.errno in win_errors or e_winerror in win_errors):
1589
1656
                    try:
1590
1657
                        self.current_dir_info = self.dir_iterator.next()
1591
1658
                        self.current_dir_list = self.current_dir_info[1]
1592
 
                    except StopIteration:
 
1659
                    except StopIteration, _:
1593
1660
                        self.current_dir_info = None
1594
1661
                else: #(dircmp > 0)
1595
1662
                    # We have a dirblock entry for this location, but there
1604
1671
                        self.current_block_pos = self.current_block_pos + 1
1605
1672
                        # entry referring to file not present on disk.
1606
1673
                        # advance the entry only, after processing.
1607
 
                        result = self._process_entry(current_entry, None)
1608
 
                        if result is not None:
1609
 
                            if result is not self.uninteresting:
 
1674
                        result, changed = self._process_entry(current_entry, None)
 
1675
                        if changed is not None:
 
1676
                            if changed:
 
1677
                                self._gather_result_for_consistency(result)
 
1678
                            if changed or self.include_unchanged:
1610
1679
                                return result
1611
1680
                    self.block_index = self.block_index + 1
1612
1681
                    self._update_current_block()
1618
1687
            # More supplied paths to process
1619
1688
            self.current_root = None
1620
1689
            return self._iter_next()
 
1690
        # Start expanding more conservatively, adding paths the user may not
 
1691
        # have intended but required for consistent deltas.
 
1692
        self.doing_consistency_expansion = 1
 
1693
        if not self._pending_consistent_entries:
 
1694
            self._pending_consistent_entries = self._next_consistent_entries()
 
1695
        while self._pending_consistent_entries:
 
1696
            result, changed = self._pending_consistent_entries.pop()
 
1697
            if changed is not None:
 
1698
                return result
1621
1699
        raise StopIteration()
1622
1700
 
1623
1701
    cdef object _maybe_tree_ref(self, current_path_info):
1673
1751
                    pass
1674
1752
                elif current_path_info is None:
1675
1753
                    # no path is fine: the per entry code will handle it.
1676
 
                    result = self._process_entry(current_entry, current_path_info)
1677
 
                    if result is not None:
1678
 
                        if result is self.uninteresting:
1679
 
                            result = None
 
1754
                    result, changed = self._process_entry(current_entry,
 
1755
                        current_path_info)
1680
1756
                else:
1681
1757
                    minikind = _minikind_from_string(
1682
1758
                        current_entry[1][self.target_index][0])
1697
1773
                        else:
1698
1774
                            # entry referring to file not present on disk.
1699
1775
                            # advance the entry only, after processing.
1700
 
                            result = self._process_entry(current_entry, None)
1701
 
                            if result is not None:
1702
 
                                if result is self.uninteresting:
1703
 
                                    result = None
 
1776
                            result, changed = self._process_entry(current_entry,
 
1777
                                None)
1704
1778
                            advance_path = 0
1705
1779
                    else:
1706
1780
                        # paths are the same,and the dirstate entry is not
1707
1781
                        # absent or renamed.
1708
 
                        result = self._process_entry(current_entry, current_path_info)
1709
 
                        if result is not None:
 
1782
                        result, changed = self._process_entry(current_entry,
 
1783
                            current_path_info)
 
1784
                        if changed is not None:
1710
1785
                            path_handled = -1
1711
 
                            if result is self.uninteresting:
1712
 
                                result = None
 
1786
                            if not changed and not self.include_unchanged:
 
1787
                                changed = None
1713
1788
                # >- loop control starts here:
1714
1789
                # >- entry
1715
1790
                if advance_entry and current_entry is not None:
1728
1803
                                and stat.S_IEXEC & current_path_info[3].st_mode)
1729
1804
                            try:
1730
1805
                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
1731
 
                            except UnicodeDecodeError:
 
1806
                            except UnicodeDecodeError, _:
1732
1807
                                raise errors.BadFilenameEncoding(
1733
1808
                                    current_path_info[0], osutils._fs_enc)
1734
 
                            if result is not None:
 
1809
                            if changed is not None:
1735
1810
                                raise AssertionError(
1736
1811
                                    "result is not None: %r" % result)
1737
1812
                            result = (None,
1742
1817
                                (None, self.utf8_decode(current_path_info[1])[0]),
1743
1818
                                (None, current_path_info[2]),
1744
1819
                                (None, new_executable))
 
1820
                            changed = True
1745
1821
                        # dont descend into this unversioned path if it is
1746
1822
                        # a dir
1747
1823
                        if current_path_info[2] in ('directory'):
1760
1836
                                current_path_info)
1761
1837
                    else:
1762
1838
                        current_path_info = None
1763
 
                if result is not None:
 
1839
                if changed is not None:
1764
1840
                    # Found a result on this pass, yield it
1765
 
                    return result
 
1841
                    if changed:
 
1842
                        self._gather_result_for_consistency(result)
 
1843
                    if changed or self.include_unchanged:
 
1844
                        return result
1766
1845
            if self.current_block is not None:
1767
1846
                self.block_index = self.block_index + 1
1768
1847
                self._update_current_block()
1772
1851
                try:
1773
1852
                    self.current_dir_info = self.dir_iterator.next()
1774
1853
                    self.current_dir_list = self.current_dir_info[1]
1775
 
                except StopIteration:
 
1854
                except StopIteration, _:
1776
1855
                    self.current_dir_info = None
 
1856
 
 
1857
    cdef object _next_consistent_entries(self):
 
1858
        """Grabs the next specific file parent case to consider.
 
1859
        
 
1860
        :return: A list of the results, each of which is as for _process_entry.
 
1861
        """
 
1862
        results = []
 
1863
        while self.search_specific_file_parents:
 
1864
            # Process the parent directories for the paths we were iterating.
 
1865
            # Even in extremely large trees this should be modest, so currently
 
1866
            # no attempt is made to optimise.
 
1867
            path_utf8 = self.search_specific_file_parents.pop()
 
1868
            if path_utf8 in self.searched_exact_paths:
 
1869
                # We've examined this path.
 
1870
                continue
 
1871
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
1872
                # We've examined this path.
 
1873
                continue
 
1874
            path_entries = self.state._entries_for_path(path_utf8)
 
1875
            # We need either one or two entries. If the path in
 
1876
            # self.target_index has moved (so the entry in source_index is in
 
1877
            # 'ar') then we need to also look for the entry for this path in
 
1878
            # self.source_index, to output the appropriate delete-or-rename.
 
1879
            selected_entries = []
 
1880
            found_item = False
 
1881
            for candidate_entry in path_entries:
 
1882
                # Find entries present in target at this path:
 
1883
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
1884
                    found_item = True
 
1885
                    selected_entries.append(candidate_entry)
 
1886
                # Find entries present in source at this path:
 
1887
                elif (self.source_index is not None and
 
1888
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
1889
                    found_item = True
 
1890
                    if candidate_entry[1][self.target_index][0] == 'a':
 
1891
                        # Deleted, emit it here.
 
1892
                        selected_entries.append(candidate_entry)
 
1893
                    else:
 
1894
                        # renamed, emit it when we process the directory it
 
1895
                        # ended up at.
 
1896
                        self.search_specific_file_parents.add(
 
1897
                            candidate_entry[1][self.target_index][1])
 
1898
            if not found_item:
 
1899
                raise AssertionError(
 
1900
                    "Missing entry for specific path parent %r, %r" % (
 
1901
                    path_utf8, path_entries))
 
1902
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
1903
            for entry in selected_entries:
 
1904
                if entry[0][2] in self.seen_ids:
 
1905
                    continue
 
1906
                result, changed = self._process_entry(entry, path_info)
 
1907
                if changed is None:
 
1908
                    raise AssertionError(
 
1909
                        "Got entry<->path mismatch for specific path "
 
1910
                        "%r entry %r path_info %r " % (
 
1911
                        path_utf8, entry, path_info))
 
1912
                # Only include changes - we're outside the users requested
 
1913
                # expansion.
 
1914
                if changed:
 
1915
                    self._gather_result_for_consistency(result)
 
1916
                    if (result[6][0] == 'directory' and
 
1917
                        result[6][1] != 'directory'):
 
1918
                        # This stopped being a directory, the old children have
 
1919
                        # to be included.
 
1920
                        if entry[1][self.source_index][0] == 'r':
 
1921
                            # renamed, take the source path
 
1922
                            entry_path_utf8 = entry[1][self.source_index][1]
 
1923
                        else:
 
1924
                            entry_path_utf8 = path_utf8
 
1925
                        initial_key = (entry_path_utf8, '', '')
 
1926
                        block_index, _ = self.state._find_block_index_from_key(
 
1927
                            initial_key)
 
1928
                        if block_index == 0:
 
1929
                            # The children of the root are in block index 1.
 
1930
                            block_index = block_index + 1
 
1931
                        current_block = None
 
1932
                        if block_index < len(self.state._dirblocks):
 
1933
                            current_block = self.state._dirblocks[block_index]
 
1934
                            if not osutils.is_inside(
 
1935
                                entry_path_utf8, current_block[0]):
 
1936
                                # No entries for this directory at all.
 
1937
                                current_block = None
 
1938
                        if current_block is not None:
 
1939
                            for entry in current_block[1]:
 
1940
                                if entry[1][self.source_index][0] in 'ar':
 
1941
                                    # Not in the source tree, so doesn't have to be
 
1942
                                    # included.
 
1943
                                    continue
 
1944
                                # Path of the entry itself.
 
1945
                                self.search_specific_file_parents.add(
 
1946
                                    self.pathjoin(*entry[0][:2]))
 
1947
                if changed or self.include_unchanged:
 
1948
                    results.append((result, changed))
 
1949
            self.searched_exact_paths.add(path_utf8)
 
1950
        return results
 
1951
 
 
1952
    cdef object _path_info(self, utf8_path, unicode_path):
 
1953
        """Generate path_info for unicode_path.
 
1954
 
 
1955
        :return: None if unicode_path does not exist, or a path_info tuple.
 
1956
        """
 
1957
        abspath = self.tree.abspath(unicode_path)
 
1958
        try:
 
1959
            stat = os.lstat(abspath)
 
1960
        except OSError, e:
 
1961
            if e.errno == errno.ENOENT:
 
1962
                # the path does not exist.
 
1963
                return None
 
1964
            else:
 
1965
                raise
 
1966
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
1967
        dir_info = (utf8_path, utf8_basename,
 
1968
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
1969
            abspath)
 
1970
        if dir_info[2] == 'directory':
 
1971
            if self.tree._directory_is_tree_reference(
 
1972
                unicode_path):
 
1973
                self.root_dir_info = self.root_dir_info[:2] + \
 
1974
                    ('tree-reference',) + self.root_dir_info[3:]
 
1975
        return dir_info