~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_pyx.pyx

(jameinel) Allow 'bzr serve' to interpret SIGHUP as a graceful shutdown.
 (bug #795025) (John A Meinel)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""Helper functions for DirState.
18
18
 
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import pathjoin, splitpath
 
31
from bzrlib.osutils import parent_directories, pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_c.h":
 
57
cdef extern from "_dirstate_helpers_pyx.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
118
118
    # ??? memrchr is a GNU extension :(
119
119
    # void *memrchr(void *s, int c, size_t len)
120
120
 
121
 
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n):
 
121
# cimport all of the definitions we will need to access
 
122
from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
 
123
    StaticTuple_New, StaticTuple_SET_ITEM
 
124
 
 
125
import_static_tuple_c()
 
126
 
 
127
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
123
128
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
129
    cdef char *pos
125
130
    cdef char *start
156
161
        return None
157
162
    return <char*>found - <char*>_s
158
163
 
 
164
 
159
165
cdef object safe_string_from_size(char *s, Py_ssize_t size):
160
166
    if size < 0:
161
 
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
167
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d @0x%x'
164
 
            % (size, <int>s))
 
168
            'tried to create a string with an invalid size: %d'
 
169
            % (size))
165
170
    return PyString_FromStringAndSize(s, size)
166
171
 
167
172
 
168
 
cdef int _is_aligned(void *ptr):
 
173
cdef int _is_aligned(void *ptr): # cannot_raise
169
174
    """Is this pointer aligned to an integer size offset?
170
175
 
171
176
    :return: 1 if this pointer is aligned, 0 otherwise.
173
178
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
179
 
175
180
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
 
181
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
177
182
    cdef unsigned char *cur1
178
183
    cdef unsigned char *cur2
179
184
    cdef unsigned char *end1
237
242
    return 0
238
243
 
239
244
 
240
 
def cmp_by_dirs_c(path1, path2):
 
245
def cmp_by_dirs(path1, path2):
241
246
    """Compare two paths directory by directory.
242
247
 
243
248
    This is equivalent to doing::
266
271
                        PyString_Size(path2))
267
272
 
268
273
 
269
 
def _cmp_path_by_dirblock_c(path1, path2):
 
274
def _cmp_path_by_dirblock(path1, path2):
270
275
    """Compare two paths based on what directory they are in.
271
276
 
272
277
    This generates a sort order, such that all children of a directory are
288
293
    if not PyString_CheckExact(path2):
289
294
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
295
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock(PyString_AsString(path1),
292
 
                                 PyString_Size(path1),
293
 
                                 PyString_AsString(path2),
294
 
                                 PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
 
                               char *path2, int path2_len):
 
296
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
 
297
                                        PyString_Size(path1),
 
298
                                        PyString_AsString(path2),
 
299
                                        PyString_Size(path2))
 
300
 
 
301
 
 
302
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
 
303
                                      char *path2, int path2_len): # cannot_raise
299
304
    """Compare two paths by what directory they are in.
300
305
 
301
 
    see ``_cmp_path_by_dirblock_c`` for details.
 
306
    see ``_cmp_path_by_dirblock`` for details.
302
307
    """
303
308
    cdef char *dirname1
304
309
    cdef int dirname1_len
368
373
    return 1
369
374
 
370
375
 
371
 
def _bisect_path_left_c(paths, path):
 
376
def _bisect_path_left(paths, path):
372
377
    """Return the index where to insert path into paths.
373
378
 
374
379
    This uses a path-wise comparison so we get::
413
418
        cur = PyList_GetItem_object_void(paths, _mid)
414
419
        cur_cstr = PyString_AS_STRING_void(cur)
415
420
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
 
421
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
 
422
                                        path_cstr, path_size) < 0:
417
423
            _lo = _mid + 1
418
424
        else:
419
425
            _hi = _mid
420
426
    return _lo
421
427
 
422
428
 
423
 
def _bisect_path_right_c(paths, path):
 
429
def _bisect_path_right(paths, path):
424
430
    """Return the index where to insert path into paths.
425
431
 
426
432
    This uses a path-wise comparison so we get::
465
471
        cur = PyList_GetItem_object_void(paths, _mid)
466
472
        cur_cstr = PyString_AS_STRING_void(cur)
467
473
        cur_size = PyString_GET_SIZE_void(cur)
468
 
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
 
474
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
 
475
                                        cur_cstr, cur_size) < 0:
469
476
            _hi = _mid
470
477
        else:
471
478
            _lo = _mid + 1
472
479
    return _lo
473
480
 
474
481
 
475
 
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
 
482
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
476
483
    """Return the index where to insert dirname into the dirblocks.
477
484
 
478
485
    The return value idx is such that all directories blocks in dirblock[:idx]
608
615
        :param new_block: This is to let the caller know that it needs to
609
616
            create a new directory block to store the next entry.
610
617
        """
611
 
        cdef object path_name_file_id_key
 
618
        cdef StaticTuple path_name_file_id_key
 
619
        cdef StaticTuple tmp
612
620
        cdef char *entry_size_cstr
613
621
        cdef unsigned long int entry_size
614
622
        cdef char* executable_cstr
648
656
        # Build up the key that will be used.
649
657
        # By using <object>(void *) Pyrex will automatically handle the
650
658
        # Py_INCREF that we need.
651
 
        path_name_file_id_key = (<object>p_current_dirname[0],
652
 
                                 self.get_next_str(),
653
 
                                 self.get_next_str(),
654
 
                                )
 
659
        cur_dirname = <object>p_current_dirname[0]
 
660
        # Use StaticTuple_New to pre-allocate, rather than creating a regular
 
661
        # tuple and passing it to the StaticTuple constructor.
 
662
        # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
 
663
        #                          self.get_next_str(),
 
664
        #                          self.get_next_str(),
 
665
        #                         )
 
666
        tmp = StaticTuple_New(3)
 
667
        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
 
668
        cur_basename = self.get_next_str()
 
669
        cur_file_id = self.get_next_str()
 
670
        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
 
671
        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
 
672
        path_name_file_id_key = tmp
655
673
 
656
674
        # Parse all of the per-tree information. current has the information in
657
675
        # the same location as parent trees. The only difference is that 'info'
675
693
            executable_cstr = self.get_next(&cur_size)
676
694
            is_executable = (executable_cstr[0] == c'y')
677
695
            info = self.get_next_str()
678
 
            PyList_Append(trees, (
 
696
            # TODO: If we want to use StaticTuple_New here we need to be pretty
 
697
            #       careful. We are relying on a bit of Pyrex
 
698
            #       automatic-conversion from 'int' to PyInt, and that doesn't
 
699
            #       play well with the StaticTuple_SET_ITEM macro.
 
700
            #       Timing doesn't (yet) show a worthwile improvement in speed
 
701
            #       versus complexity and maintainability.
 
702
            # tmp = StaticTuple_New(5)
 
703
            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
 
704
            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
 
705
            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
 
706
            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
 
707
            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
 
708
            # PyList_Append(trees, tmp)
 
709
            PyList_Append(trees, StaticTuple(
679
710
                minikind,     # minikind
680
711
                fingerprint,  # fingerprint
681
712
                entry_size,   # size
744
775
        self.state._split_root_dirblock_into_contents()
745
776
 
746
777
 
747
 
def _read_dirblocks_c(state):
 
778
def _read_dirblocks(state):
748
779
    """Read in the dirblocks for the given DirState object.
749
780
 
750
781
    This is tightly bound to the DirState internal representation. It should be
766
797
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
767
798
 
768
799
 
769
 
cdef int minikind_from_mode(int mode):
 
800
cdef int minikind_from_mode(int mode): # cannot_raise
770
801
    # in order of frequency:
771
802
    if S_ISREG(mode):
772
803
        return c"f"
835
866
    # _st mode of the compiled stat objects.
836
867
    cdef int minikind, saved_minikind
837
868
    cdef void * details
 
869
    cdef int worth_saving
838
870
    minikind = minikind_from_mode(stat_value.st_mode)
839
871
    if 0 == minikind:
840
872
        return None
841
873
    packed_stat = _pack_stat(stat_value)
842
874
    details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
843
875
    saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
 
876
    if minikind == c'd' and saved_minikind == c't':
 
877
        minikind = c't'
844
878
    saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
845
879
    saved_file_size = PyTuple_GetItem_void_object(details, 2)
846
880
    saved_executable = PyTuple_GetItem_void_object(details, 3)
867
901
    # If we have gotten this far, that means that we need to actually
868
902
    # process this entry.
869
903
    link_or_sha1 = None
 
904
    worth_saving = 1
870
905
    if minikind == c'f':
871
906
        executable = self._is_executable(stat_value.st_mode,
872
907
                                         saved_executable)
883
918
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
884
919
                           executable, packed_stat)
885
920
        else:
886
 
            entry[1][0] = ('f', '', stat_value.st_size,
887
 
                           executable, DirState.NULLSTAT)
 
921
            # This file is not worth caching the sha1. Either it is too new, or
 
922
            # it is newly added. Regardless, the only things we are changing
 
923
            # are derived from the stat, and so are not worth caching. So we do
 
924
            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
 
925
            # updated values if there is *other* data worth saving.)
 
926
            entry[1][0] = ('f', '', stat_value.st_size, executable,
 
927
                           DirState.NULLSTAT)
 
928
            worth_saving = 0
888
929
    elif minikind == c'd':
889
 
        link_or_sha1 = None
890
930
        entry[1][0] = ('d', '', 0, False, packed_stat)
891
931
        if saved_minikind != c'd':
892
932
            # This changed from something into a directory. Make sure we
896
936
                self._get_block_entry_index(entry[0][0], entry[0][1], 0)
897
937
            self._ensure_block(block_index, entry_index,
898
938
                               pathjoin(entry[0][0], entry[0][1]))
 
939
        else:
 
940
            # Any changes are derived trivially from the stat object, not worth
 
941
            # re-writing a dirstate for just this
 
942
            worth_saving = 0
899
943
    elif minikind == c'l':
 
944
        if saved_minikind == c'l':
 
945
            # If the object hasn't changed kind, it isn't worth saving the
 
946
            # dirstate just for a symlink. The default is 'fast symlinks' which
 
947
            # save the target in the inode entry, rather than separately. So to
 
948
            # stat, we've already read everything off disk.
 
949
            worth_saving = 0
900
950
        link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
901
951
        if self._cutoff_time is None:
902
952
            self._sha_cutoff_time()
907
957
        else:
908
958
            entry[1][0] = ('l', '', stat_value.st_size,
909
959
                           False, DirState.NULLSTAT)
910
 
    self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
960
    if worth_saving:
 
961
        # Note, even though _mark_modified will only set
 
962
        # IN_MEMORY_HASH_MODIFIED, it still isn't worth 
 
963
        self._mark_modified([entry])
911
964
    return link_or_sha1
912
965
 
913
966
 
914
 
cdef char _minikind_from_string(object string):
 
967
# TODO: Do we want to worry about exceptions here?
 
968
cdef char _minikind_from_string(object string) except? -1:
915
969
    """Convert a python string to a char."""
916
970
    return PyString_AsString(string)[0]
917
971
 
949
1003
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
950
1004
 
951
1005
 
952
 
cdef int _versioned_minikind(char minikind):
 
1006
cdef int _versioned_minikind(char minikind): # cannot_raise
953
1007
    """Return non-zero if minikind is in fltd"""
954
1008
    return (minikind == c'f' or
955
1009
            minikind == c'd' or
959
1013
 
960
1014
cdef class ProcessEntryC:
961
1015
 
 
1016
    cdef int doing_consistency_expansion
962
1017
    cdef object old_dirname_to_file_id # dict
963
1018
    cdef object new_dirname_to_file_id # dict
964
 
    cdef readonly object uninteresting
965
1019
    cdef object last_source_parent
966
1020
    cdef object last_target_parent
967
 
    cdef object include_unchanged
 
1021
    cdef int include_unchanged
 
1022
    cdef int partial
968
1023
    cdef object use_filesystem_for_exec
969
1024
    cdef object utf8_decode
970
1025
    cdef readonly object searched_specific_files
 
1026
    cdef readonly object searched_exact_paths
971
1027
    cdef object search_specific_files
 
1028
    # The parents up to the root of the paths we are searching.
 
1029
    # After all normal paths are returned, these specific items are returned.
 
1030
    cdef object search_specific_file_parents
972
1031
    cdef object state
973
1032
    # Current iteration variables:
974
1033
    cdef object current_root
986
1045
    cdef object current_block_list
987
1046
    cdef object current_dir_info
988
1047
    cdef object current_dir_list
 
1048
    cdef object _pending_consistent_entries # list
989
1049
    cdef int path_index
990
1050
    cdef object root_dir_info
991
1051
    cdef object bisect_left
992
1052
    cdef object pathjoin
993
1053
    cdef object fstat
 
1054
    # A set of the ids we've output when doing partial output.
 
1055
    cdef object seen_ids
994
1056
    cdef object sha_file
995
1057
 
996
1058
    def __init__(self, include_unchanged, use_filesystem_for_exec,
997
1059
        search_specific_files, state, source_index, target_index,
998
1060
        want_unversioned, tree):
 
1061
        self.doing_consistency_expansion = 0
999
1062
        self.old_dirname_to_file_id = {}
1000
1063
        self.new_dirname_to_file_id = {}
1001
 
        # Just a sentry, so that _process_entry can say that this
1002
 
        # record is handled, but isn't interesting to process (unchanged)
1003
 
        self.uninteresting = object()
 
1064
        # Are we doing a partial iter_changes?
 
1065
        self.partial = set(['']).__ne__(search_specific_files)
1004
1066
        # Using a list so that we can access the values and change them in
1005
1067
        # nested scope. Each one is [path, file_id, entry]
1006
1068
        self.last_source_parent = [None, None]
1007
1069
        self.last_target_parent = [None, None]
1008
 
        self.include_unchanged = include_unchanged
 
1070
        if include_unchanged is None:
 
1071
            self.include_unchanged = False
 
1072
        else:
 
1073
            self.include_unchanged = int(include_unchanged)
1009
1074
        self.use_filesystem_for_exec = use_filesystem_for_exec
1010
1075
        self.utf8_decode = cache_utf8._utf8_decode
1011
1076
        # for all search_indexs in each path at or under each element of
1012
 
        # search_specific_files, if the detail is relocated: add the id, and add the
1013
 
        # relocated path as one to search if its not searched already. If the
1014
 
        # detail is not relocated, add the id.
 
1077
        # search_specific_files, if the detail is relocated: add the id, and
 
1078
        # add the relocated path as one to search if its not searched already.
 
1079
        # If the detail is not relocated, add the id.
1015
1080
        self.searched_specific_files = set()
 
1081
        # When we search exact paths without expanding downwards, we record
 
1082
        # that here.
 
1083
        self.searched_exact_paths = set()
1016
1084
        self.search_specific_files = search_specific_files
 
1085
        # The parents up to the root of the paths we are searching.
 
1086
        # After all normal paths are returned, these specific items are returned.
 
1087
        self.search_specific_file_parents = set()
 
1088
        # The ids we've sent out in the delta.
 
1089
        self.seen_ids = set()
1017
1090
        self.state = state
1018
1091
        self.current_root = None
1019
1092
        self.current_root_unicode = None
1035
1108
        self.current_block_pos = -1
1036
1109
        self.current_dir_info = None
1037
1110
        self.current_dir_list = None
 
1111
        self._pending_consistent_entries = []
1038
1112
        self.path_index = 0
1039
1113
        self.root_dir_info = None
1040
1114
        self.bisect_left = bisect.bisect_left
1041
1115
        self.pathjoin = osutils.pathjoin
1042
1116
        self.fstat = os.fstat
1043
1117
        self.sha_file = osutils.sha_file
 
1118
        if target_index != 0:
 
1119
            # A lot of code in here depends on target_index == 0
 
1120
            raise errors.BzrError('unsupported target index')
1044
1121
 
1045
1122
    cdef _process_entry(self, entry, path_info):
1046
1123
        """Compare an entry and real disk to generate delta information.
1047
1124
 
1048
1125
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1049
 
            the path of entry. If None, then the path is considered absent.
1050
 
            (Perhaps we should pass in a concrete entry for this ?)
 
1126
            the path of entry. If None, then the path is considered absent in 
 
1127
            the target (Perhaps we should pass in a concrete entry for this ?)
1051
1128
            Basename is returned as a utf8 string because we expect this
1052
1129
            tuple will be ignored, and don't want to take the time to
1053
1130
            decode.
1054
 
        :return: None if the these don't match
1055
 
                 A tuple of information about the change, or
1056
 
                 the object 'uninteresting' if these match, but are
1057
 
                 basically identical.
 
1131
        :return: (iter_changes_result, changed). If the entry has not been
 
1132
            handled then changed is None. Otherwise it is False if no content
 
1133
            or metadata changes have occured, and True if any content or
 
1134
            metadata change has occurred. If self.include_unchanged is True then
 
1135
            if changed is not None, iter_changes_result will always be a result
 
1136
            tuple. Otherwise, iter_changes_result is None unless changed is
 
1137
            True.
1058
1138
        """
1059
1139
        cdef char target_minikind
1060
1140
        cdef char source_minikind
1096
1176
            else:
1097
1177
                # add the source to the search path to find any children it
1098
1178
                # has.  TODO ? : only add if it is a container ?
1099
 
                if not osutils.is_inside_any(self.searched_specific_files,
1100
 
                                             source_details[1]):
 
1179
                if (not self.doing_consistency_expansion and 
 
1180
                    not osutils.is_inside_any(self.searched_specific_files,
 
1181
                                             source_details[1])):
1101
1182
                    self.search_specific_files.add(source_details[1])
 
1183
                    # expanding from a user requested path, parent expansion
 
1184
                    # for delta consistency happens later.
1102
1185
                # generate the old path; this is needed for stating later
1103
1186
                # as well.
1104
1187
                old_path = source_details[1]
1138
1221
                    if source_minikind != c'f':
1139
1222
                        content_change = 1
1140
1223
                    else:
1141
 
                        # If the size is the same, check the sha:
1142
 
                        if target_details[2] == source_details[2]:
1143
 
                            if link_or_sha1 is None:
1144
 
                                # Stat cache miss:
1145
 
                                file_obj = file(path_info[4], 'rb')
1146
 
                                try:
1147
 
                                    # XXX: TODO: Use lower level file IO rather
1148
 
                                    # than python objects for sha-misses.
1149
 
                                    statvalue = self.fstat(file_obj.fileno())
1150
 
                                    link_or_sha1 = self.sha_file(file_obj)
1151
 
                                finally:
1152
 
                                    file_obj.close()
1153
 
                                self.state._observed_sha1(entry, link_or_sha1,
1154
 
                                    statvalue)
1155
 
                            content_change = (link_or_sha1 != source_details[1])
1156
 
                        else:
1157
 
                            # Size changed, so must be different
1158
 
                            content_change = 1
 
1224
                        # Check the sha. We can't just rely on the size as
 
1225
                        # content filtering may mean differ sizes actually
 
1226
                        # map to the same content
 
1227
                        if link_or_sha1 is None:
 
1228
                            # Stat cache miss:
 
1229
                            statvalue, link_or_sha1 = \
 
1230
                                self.state._sha1_provider.stat_and_sha1(
 
1231
                                path_info[4])
 
1232
                            self.state._observed_sha1(entry, link_or_sha1,
 
1233
                                statvalue)
 
1234
                        content_change = (link_or_sha1 != source_details[1])
1159
1235
                    # Target details is updated at update_entry time
1160
1236
                    if self.use_filesystem_for_exec:
1161
1237
                        # We don't need S_ISREG here, because we are sure
1176
1252
                        content_change = 0
1177
1253
                    target_exec = False
1178
1254
                else:
1179
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
1255
                    if path is None:
 
1256
                        path = self.pathjoin(old_dirname, old_basename)
 
1257
                    raise errors.BadFileKindError(path, path_info[2])
1180
1258
            if source_minikind == c'd':
1181
1259
                if path is None:
1182
1260
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1184
1262
                    file_id = entry[0][2]
1185
1263
                self.old_dirname_to_file_id[old_path] = file_id
1186
1264
            # parent id is the entry for the path in the target tree
1187
 
            if old_dirname == self.last_source_parent[0]:
 
1265
            if old_basename and old_dirname == self.last_source_parent[0]:
 
1266
                # use a cached hit for non-root source entries.
1188
1267
                source_parent_id = self.last_source_parent[1]
1189
1268
            else:
1190
1269
                try:
1191
1270
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
1192
 
                except KeyError:
 
1271
                except KeyError, _:
1193
1272
                    source_parent_entry = self.state._get_entry(self.source_index,
1194
1273
                                                           path_utf8=old_dirname)
1195
1274
                    source_parent_id = source_parent_entry[0][2]
1200
1279
                    self.last_source_parent[0] = old_dirname
1201
1280
                    self.last_source_parent[1] = source_parent_id
1202
1281
            new_dirname = entry[0][0]
1203
 
            if new_dirname == self.last_target_parent[0]:
 
1282
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
1283
                # use a cached hit for non-root target entries.
1204
1284
                target_parent_id = self.last_target_parent[1]
1205
1285
            else:
1206
1286
                try:
1207
1287
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
1208
 
                except KeyError:
 
1288
                except KeyError, _:
1209
1289
                    # TODO: We don't always need to do the lookup, because the
1210
1290
                    #       parent entry will be the same as the source entry.
1211
1291
                    target_parent_entry = self.state._get_entry(self.target_index,
1223
1303
                    self.last_target_parent[1] = target_parent_id
1224
1304
 
1225
1305
            source_exec = source_details[3]
1226
 
            if (self.include_unchanged
1227
 
                or content_change
 
1306
            changed = (content_change
1228
1307
                or source_parent_id != target_parent_id
1229
1308
                or old_basename != entry[0][1]
1230
1309
                or source_exec != target_exec
1231
 
                ):
 
1310
                )
 
1311
            if not changed and not self.include_unchanged:
 
1312
                return None, False
 
1313
            else:
1232
1314
                if old_path is None:
1233
1315
                    path = self.pathjoin(old_dirname, old_basename)
1234
1316
                    old_path = path
1248
1330
                       (source_parent_id, target_parent_id),
1249
1331
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1250
1332
                       (source_kind, target_kind),
1251
 
                       (source_exec, target_exec))
1252
 
            else:
1253
 
                return self.uninteresting
 
1333
                       (source_exec, target_exec)), changed
1254
1334
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1255
1335
            # looks like a new file
1256
1336
            path = self.pathjoin(entry[0][0], entry[0][1])
1257
1337
            # parent id is the entry for the path in the target tree
1258
1338
            # TODO: these are the same for an entire directory: cache em.
1259
 
            parent_id = self.state._get_entry(self.target_index,
1260
 
                                         path_utf8=entry[0][0])[0][2]
 
1339
            parent_entry = self.state._get_entry(self.target_index,
 
1340
                                                 path_utf8=entry[0][0])
 
1341
            if parent_entry is None:
 
1342
                raise errors.DirstateCorrupt(self.state,
 
1343
                    "We could not find the parent entry in index %d"
 
1344
                    " for the entry: %s"
 
1345
                    % (self.target_index, entry[0]))
 
1346
            parent_id = parent_entry[0][2]
1261
1347
            if parent_id == entry[0][2]:
1262
1348
                parent_id = None
1263
1349
            if path_info is not None:
1277
1363
                       (None, parent_id),
1278
1364
                       (None, self.utf8_decode(entry[0][1])[0]),
1279
1365
                       (None, path_info[2]),
1280
 
                       (None, target_exec))
 
1366
                       (None, target_exec)), True
1281
1367
            else:
1282
1368
                # Its a missing file, report it as such.
1283
1369
                return (entry[0][2],
1287
1373
                       (None, parent_id),
1288
1374
                       (None, self.utf8_decode(entry[0][1])[0]),
1289
1375
                       (None, None),
1290
 
                       (None, False))
 
1376
                       (None, False)), True
1291
1377
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1292
1378
            # unversioned, possibly, or possibly not deleted: we dont care.
1293
1379
            # if its still on disk, *and* theres no other entry at this
1305
1391
                   (parent_id, None),
1306
1392
                   (self.utf8_decode(entry[0][1])[0], None),
1307
1393
                   (_minikind_to_kind(source_minikind), None),
1308
 
                   (source_details[3], None))
 
1394
                   (source_details[3], None)), True
1309
1395
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1310
1396
            # a rename; could be a true rename, or a rename inherited from
1311
1397
            # a renamed parent. TODO: handle this efficiently. Its not
1312
1398
            # common case to rename dirs though, so a correct but slow
1313
1399
            # implementation will do.
1314
 
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
1400
            if (not self.doing_consistency_expansion and 
 
1401
                not osutils.is_inside_any(self.searched_specific_files,
 
1402
                    target_details[1])):
1315
1403
                self.search_specific_files.add(target_details[1])
 
1404
                # We don't expand the specific files parents list here as
 
1405
                # the path is absent in target and won't create a delta with
 
1406
                # missing parent.
1316
1407
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1317
1408
              (target_minikind == c'r' or target_minikind == c'a')):
1318
1409
            # neither of the selected trees contain this path,
1324
1415
                "source_minikind=%r, target_minikind=%r"
1325
1416
                % (source_minikind, target_minikind))
1326
1417
            ## import pdb;pdb.set_trace()
1327
 
        return None
 
1418
        return None, None
1328
1419
 
1329
1420
    def __iter__(self):
1330
1421
        return self
1332
1423
    def iter_changes(self):
1333
1424
        return self
1334
1425
 
1335
 
    cdef void _update_current_block(self):
 
1426
    cdef int _gather_result_for_consistency(self, result) except -1:
 
1427
        """Check a result we will yield to make sure we are consistent later.
 
1428
        
 
1429
        This gathers result's parents into a set to output later.
 
1430
 
 
1431
        :param result: A result tuple.
 
1432
        """
 
1433
        if not self.partial or not result[0]:
 
1434
            return 0
 
1435
        self.seen_ids.add(result[0])
 
1436
        new_path = result[1][1]
 
1437
        if new_path:
 
1438
            # Not the root and not a delete: queue up the parents of the path.
 
1439
            self.search_specific_file_parents.update(
 
1440
                osutils.parent_directories(new_path.encode('utf8')))
 
1441
            # Add the root directory which parent_directories does not
 
1442
            # provide.
 
1443
            self.search_specific_file_parents.add('')
 
1444
        return 0
 
1445
 
 
1446
    cdef int _update_current_block(self) except -1:
1336
1447
        if (self.block_index < len(self.state._dirblocks) and
1337
1448
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1338
1449
            self.current_block = self.state._dirblocks[self.block_index]
1341
1452
        else:
1342
1453
            self.current_block = None
1343
1454
            self.current_block_list = None
 
1455
        return 0
1344
1456
 
1345
1457
    def __next__(self):
1346
1458
        # Simple thunk to allow tail recursion without pyrex confusion
1398
1510
        cdef char * current_dirname_c, * current_blockname_c
1399
1511
        cdef int advance_entry, advance_path
1400
1512
        cdef int path_handled
1401
 
        uninteresting = self.uninteresting
1402
1513
        searched_specific_files = self.searched_specific_files
1403
1514
        # Are we walking a root?
1404
1515
        while self.root_entries_pos < self.root_entries_len:
1405
1516
            entry = self.root_entries[self.root_entries_pos]
1406
1517
            self.root_entries_pos = self.root_entries_pos + 1
1407
 
            result = self._process_entry(entry, self.root_dir_info)
1408
 
            if result is not None and result is not self.uninteresting:
1409
 
                return result
 
1518
            result, changed = self._process_entry(entry, self.root_dir_info)
 
1519
            if changed is not None:
 
1520
                if changed:
 
1521
                    self._gather_result_for_consistency(result)
 
1522
                if changed or self.include_unchanged:
 
1523
                    return result
1410
1524
        # Have we finished the prior root, or never started one ?
1411
1525
        if self.current_root is None:
1412
1526
            # TODO: the pending list should be lexically sorted?  the
1413
1527
            # interface doesn't require it.
1414
1528
            try:
1415
1529
                self.current_root = self.search_specific_files.pop()
1416
 
            except KeyError:
 
1530
            except KeyError, _:
1417
1531
                raise StopIteration()
1418
 
            self.current_root_unicode = self.current_root.decode('utf8')
1419
1532
            self.searched_specific_files.add(self.current_root)
1420
1533
            # process the entries for this containing directory: the rest will be
1421
1534
            # found by their parents recursively.
1422
1535
            self.root_entries = self.state._entries_for_path(self.current_root)
1423
1536
            self.root_entries_len = len(self.root_entries)
 
1537
            self.current_root_unicode = self.current_root.decode('utf8')
1424
1538
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1425
1539
            try:
1426
1540
                root_stat = os.lstat(self.root_abspath)
1454
1568
            while self.root_entries_pos < self.root_entries_len:
1455
1569
                entry = self.root_entries[self.root_entries_pos]
1456
1570
                self.root_entries_pos = self.root_entries_pos + 1
1457
 
                result = self._process_entry(entry, self.root_dir_info)
1458
 
                if result is not None:
 
1571
                result, changed = self._process_entry(entry, self.root_dir_info)
 
1572
                if changed is not None:
1459
1573
                    path_handled = -1
1460
 
                    if result is not self.uninteresting:
 
1574
                    if changed:
 
1575
                        self._gather_result_for_consistency(result)
 
1576
                    if changed or self.include_unchanged:
1461
1577
                        return result
1462
1578
            # handle unversioned specified paths:
1463
1579
            if self.want_unversioned and not path_handled and self.root_dir_info:
1475
1591
                      )
1476
1592
            # If we reach here, the outer flow continues, which enters into the
1477
1593
            # per-root setup logic.
1478
 
        if self.current_dir_info is None and self.current_block is None:
 
1594
        if (self.current_dir_info is None and self.current_block is None and not
 
1595
            self.doing_consistency_expansion):
1479
1596
            # setup iteration of this root:
1480
1597
            self.current_dir_list = None
1481
1598
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1499
1616
                        #            and e.winerror == ERROR_DIRECTORY
1500
1617
                        try:
1501
1618
                            e_winerror = e.winerror
1502
 
                        except AttributeError:
 
1619
                        except AttributeError, _:
1503
1620
                            e_winerror = None
1504
1621
                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
1505
1622
                        if (e.errno in win_errors or e_winerror in win_errors):
1588
1705
                    try:
1589
1706
                        self.current_dir_info = self.dir_iterator.next()
1590
1707
                        self.current_dir_list = self.current_dir_info[1]
1591
 
                    except StopIteration:
 
1708
                    except StopIteration, _:
1592
1709
                        self.current_dir_info = None
1593
1710
                else: #(dircmp > 0)
1594
1711
                    # We have a dirblock entry for this location, but there
1603
1720
                        self.current_block_pos = self.current_block_pos + 1
1604
1721
                        # entry referring to file not present on disk.
1605
1722
                        # advance the entry only, after processing.
1606
 
                        result = self._process_entry(current_entry, None)
1607
 
                        if result is not None:
1608
 
                            if result is not self.uninteresting:
 
1723
                        result, changed = self._process_entry(current_entry, None)
 
1724
                        if changed is not None:
 
1725
                            if changed:
 
1726
                                self._gather_result_for_consistency(result)
 
1727
                            if changed or self.include_unchanged:
1609
1728
                                return result
1610
1729
                    self.block_index = self.block_index + 1
1611
1730
                    self._update_current_block()
1617
1736
            # More supplied paths to process
1618
1737
            self.current_root = None
1619
1738
            return self._iter_next()
 
1739
        # Start expanding more conservatively, adding paths the user may not
 
1740
        # have intended but required for consistent deltas.
 
1741
        self.doing_consistency_expansion = 1
 
1742
        if not self._pending_consistent_entries:
 
1743
            self._pending_consistent_entries = self._next_consistent_entries()
 
1744
        while self._pending_consistent_entries:
 
1745
            result, changed = self._pending_consistent_entries.pop()
 
1746
            if changed is not None:
 
1747
                return result
1620
1748
        raise StopIteration()
1621
1749
 
1622
1750
    cdef object _maybe_tree_ref(self, current_path_info):
1665
1793
                advance_entry = -1
1666
1794
                advance_path = -1
1667
1795
                result = None
 
1796
                changed = None
1668
1797
                path_handled = 0
1669
1798
                if current_entry is None:
1670
1799
                    # unversioned -  the check for path_handled when the path
1672
1801
                    pass
1673
1802
                elif current_path_info is None:
1674
1803
                    # no path is fine: the per entry code will handle it.
1675
 
                    result = self._process_entry(current_entry, current_path_info)
1676
 
                    if result is not None:
1677
 
                        if result is self.uninteresting:
1678
 
                            result = None
 
1804
                    result, changed = self._process_entry(current_entry,
 
1805
                        current_path_info)
1679
1806
                else:
1680
1807
                    minikind = _minikind_from_string(
1681
1808
                        current_entry[1][self.target_index][0])
1696
1823
                        else:
1697
1824
                            # entry referring to file not present on disk.
1698
1825
                            # advance the entry only, after processing.
1699
 
                            result = self._process_entry(current_entry, None)
1700
 
                            if result is not None:
1701
 
                                if result is self.uninteresting:
1702
 
                                    result = None
 
1826
                            result, changed = self._process_entry(current_entry,
 
1827
                                None)
1703
1828
                            advance_path = 0
1704
1829
                    else:
1705
1830
                        # paths are the same,and the dirstate entry is not
1706
1831
                        # absent or renamed.
1707
 
                        result = self._process_entry(current_entry, current_path_info)
1708
 
                        if result is not None:
 
1832
                        result, changed = self._process_entry(current_entry,
 
1833
                            current_path_info)
 
1834
                        if changed is not None:
1709
1835
                            path_handled = -1
1710
 
                            if result is self.uninteresting:
1711
 
                                result = None
 
1836
                            if not changed and not self.include_unchanged:
 
1837
                                changed = None
1712
1838
                # >- loop control starts here:
1713
1839
                # >- entry
1714
1840
                if advance_entry and current_entry is not None:
1727
1853
                                and stat.S_IEXEC & current_path_info[3].st_mode)
1728
1854
                            try:
1729
1855
                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
1730
 
                            except UnicodeDecodeError:
 
1856
                            except UnicodeDecodeError, _:
1731
1857
                                raise errors.BadFilenameEncoding(
1732
1858
                                    current_path_info[0], osutils._fs_enc)
1733
 
                            if result is not None:
 
1859
                            if changed is not None:
1734
1860
                                raise AssertionError(
1735
1861
                                    "result is not None: %r" % result)
1736
1862
                            result = (None,
1741
1867
                                (None, self.utf8_decode(current_path_info[1])[0]),
1742
1868
                                (None, current_path_info[2]),
1743
1869
                                (None, new_executable))
 
1870
                            changed = True
1744
1871
                        # dont descend into this unversioned path if it is
1745
1872
                        # a dir
1746
1873
                        if current_path_info[2] in ('directory'):
1759
1886
                                current_path_info)
1760
1887
                    else:
1761
1888
                        current_path_info = None
1762
 
                if result is not None:
 
1889
                if changed is not None:
1763
1890
                    # Found a result on this pass, yield it
1764
 
                    return result
 
1891
                    if changed:
 
1892
                        self._gather_result_for_consistency(result)
 
1893
                    if changed or self.include_unchanged:
 
1894
                        return result
1765
1895
            if self.current_block is not None:
1766
1896
                self.block_index = self.block_index + 1
1767
1897
                self._update_current_block()
1771
1901
                try:
1772
1902
                    self.current_dir_info = self.dir_iterator.next()
1773
1903
                    self.current_dir_list = self.current_dir_info[1]
1774
 
                except StopIteration:
 
1904
                except StopIteration, _:
1775
1905
                    self.current_dir_info = None
 
1906
 
 
1907
    cdef object _next_consistent_entries(self):
 
1908
        """Grabs the next specific file parent case to consider.
 
1909
        
 
1910
        :return: A list of the results, each of which is as for _process_entry.
 
1911
        """
 
1912
        results = []
 
1913
        while self.search_specific_file_parents:
 
1914
            # Process the parent directories for the paths we were iterating.
 
1915
            # Even in extremely large trees this should be modest, so currently
 
1916
            # no attempt is made to optimise.
 
1917
            path_utf8 = self.search_specific_file_parents.pop()
 
1918
            if path_utf8 in self.searched_exact_paths:
 
1919
                # We've examined this path.
 
1920
                continue
 
1921
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
1922
                # We've examined this path.
 
1923
                continue
 
1924
            path_entries = self.state._entries_for_path(path_utf8)
 
1925
            # We need either one or two entries. If the path in
 
1926
            # self.target_index has moved (so the entry in source_index is in
 
1927
            # 'ar') then we need to also look for the entry for this path in
 
1928
            # self.source_index, to output the appropriate delete-or-rename.
 
1929
            selected_entries = []
 
1930
            found_item = False
 
1931
            for candidate_entry in path_entries:
 
1932
                # Find entries present in target at this path:
 
1933
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
1934
                    found_item = True
 
1935
                    selected_entries.append(candidate_entry)
 
1936
                # Find entries present in source at this path:
 
1937
                elif (self.source_index is not None and
 
1938
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
1939
                    found_item = True
 
1940
                    if candidate_entry[1][self.target_index][0] == 'a':
 
1941
                        # Deleted, emit it here.
 
1942
                        selected_entries.append(candidate_entry)
 
1943
                    else:
 
1944
                        # renamed, emit it when we process the directory it
 
1945
                        # ended up at.
 
1946
                        self.search_specific_file_parents.add(
 
1947
                            candidate_entry[1][self.target_index][1])
 
1948
            if not found_item:
 
1949
                raise AssertionError(
 
1950
                    "Missing entry for specific path parent %r, %r" % (
 
1951
                    path_utf8, path_entries))
 
1952
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
1953
            for entry in selected_entries:
 
1954
                if entry[0][2] in self.seen_ids:
 
1955
                    continue
 
1956
                result, changed = self._process_entry(entry, path_info)
 
1957
                if changed is None:
 
1958
                    raise AssertionError(
 
1959
                        "Got entry<->path mismatch for specific path "
 
1960
                        "%r entry %r path_info %r " % (
 
1961
                        path_utf8, entry, path_info))
 
1962
                # Only include changes - we're outside the users requested
 
1963
                # expansion.
 
1964
                if changed:
 
1965
                    self._gather_result_for_consistency(result)
 
1966
                    if (result[6][0] == 'directory' and
 
1967
                        result[6][1] != 'directory'):
 
1968
                        # This stopped being a directory, the old children have
 
1969
                        # to be included.
 
1970
                        if entry[1][self.source_index][0] == 'r':
 
1971
                            # renamed, take the source path
 
1972
                            entry_path_utf8 = entry[1][self.source_index][1]
 
1973
                        else:
 
1974
                            entry_path_utf8 = path_utf8
 
1975
                        initial_key = (entry_path_utf8, '', '')
 
1976
                        block_index, _ = self.state._find_block_index_from_key(
 
1977
                            initial_key)
 
1978
                        if block_index == 0:
 
1979
                            # The children of the root are in block index 1.
 
1980
                            block_index = block_index + 1
 
1981
                        current_block = None
 
1982
                        if block_index < len(self.state._dirblocks):
 
1983
                            current_block = self.state._dirblocks[block_index]
 
1984
                            if not osutils.is_inside(
 
1985
                                entry_path_utf8, current_block[0]):
 
1986
                                # No entries for this directory at all.
 
1987
                                current_block = None
 
1988
                        if current_block is not None:
 
1989
                            for entry in current_block[1]:
 
1990
                                if entry[1][self.source_index][0] in 'ar':
 
1991
                                    # Not in the source tree, so doesn't have to be
 
1992
                                    # included.
 
1993
                                    continue
 
1994
                                # Path of the entry itself.
 
1995
                                self.search_specific_file_parents.add(
 
1996
                                    self.pathjoin(*entry[0][:2]))
 
1997
                if changed or self.include_unchanged:
 
1998
                    results.append((result, changed))
 
1999
            self.searched_exact_paths.add(path_utf8)
 
2000
        return results
 
2001
 
 
2002
    cdef object _path_info(self, utf8_path, unicode_path):
 
2003
        """Generate path_info for unicode_path.
 
2004
 
 
2005
        :return: None if unicode_path does not exist, or a path_info tuple.
 
2006
        """
 
2007
        abspath = self.tree.abspath(unicode_path)
 
2008
        try:
 
2009
            stat = os.lstat(abspath)
 
2010
        except OSError, e:
 
2011
            if e.errno == errno.ENOENT:
 
2012
                # the path does not exist.
 
2013
                return None
 
2014
            else:
 
2015
                raise
 
2016
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
2017
        dir_info = (utf8_path, utf8_basename,
 
2018
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
2019
            abspath)
 
2020
        if dir_info[2] == 'directory':
 
2021
            if self.tree._directory_is_tree_reference(
 
2022
                unicode_path):
 
2023
                self.root_dir_info = self.root_dir_info[:2] + \
 
2024
                    ('tree-reference',) + self.root_dir_info[3:]
 
2025
        return dir_info