~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_c.pyx

  • Committer: John Arbash Meinel
  • Date: 2009-06-04 16:50:33 UTC
  • mto: This revision was merged to the branch mainline in revision 4410.
  • Revision ID: john@arbash-meinel.com-20090604165033-bfdo0lyf4yt4vjcz
We don't need a base Coder class, because Decoder._update_tail is different than Encoder._update_tail.
(one adds, one subtracts from self.size).
So we now have 2 versions of the macro, and the test suite stops crashing... :)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007-2010 Canonical Ltd
 
1
# Copyright (C) 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
28
28
 
29
29
from bzrlib import cache_utf8, errors, osutils
30
30
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import parent_directories, pathjoin, splitpath
 
31
from bzrlib.osutils import pathjoin, splitpath
32
32
 
33
33
 
34
34
# This is the Windows equivalent of ENOTDIR
54
54
cdef extern from *:
55
55
    ctypedef unsigned long size_t
56
56
 
57
 
cdef extern from "_dirstate_helpers_pyx.h":
 
57
cdef extern from "_dirstate_helpers_c.h":
58
58
    ctypedef int intptr_t
59
59
 
60
60
 
97
97
    object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index)
98
98
    object PyTuple_GET_ITEM(object tpl, Py_ssize_t index)
99
99
 
100
 
    unsigned long PyInt_AsUnsignedLongMask(object number) except? -1
101
100
 
102
101
    char *PyString_AsString(object p)
103
102
    char *PyString_AsString_obj "PyString_AsString" (PyObject *string)
119
118
    # ??? memrchr is a GNU extension :(
120
119
    # void *memrchr(void *s, int c, size_t len)
121
120
 
122
 
# cimport all of the definitions we will need to access
123
 
from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
124
 
    StaticTuple_New, StaticTuple_SET_ITEM
125
 
 
126
 
import_static_tuple_c()
127
 
 
128
 
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
 
121
 
 
122
cdef void* _my_memrchr(void *s, int c, size_t n):
129
123
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
130
124
    cdef char *pos
131
125
    cdef char *start
162
156
        return None
163
157
    return <char*>found - <char*>_s
164
158
 
165
 
 
166
159
cdef object safe_string_from_size(char *s, Py_ssize_t size):
167
160
    if size < 0:
 
161
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
168
162
        raise AssertionError(
169
 
            'tried to create a string with an invalid size: %d'
170
 
            % (size))
 
163
            'tried to create a string with an invalid size: %d @0x%x'
 
164
            % (size, <int>s))
171
165
    return PyString_FromStringAndSize(s, size)
172
166
 
173
167
 
174
 
cdef int _is_aligned(void *ptr): # cannot_raise
 
168
cdef int _is_aligned(void *ptr):
175
169
    """Is this pointer aligned to an integer size offset?
176
170
 
177
171
    :return: 1 if this pointer is aligned, 0 otherwise.
179
173
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
180
174
 
181
175
 
182
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
 
176
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
183
177
    cdef unsigned char *cur1
184
178
    cdef unsigned char *cur2
185
179
    cdef unsigned char *end1
243
237
    return 0
244
238
 
245
239
 
246
 
def cmp_by_dirs(path1, path2):
 
240
def cmp_by_dirs_c(path1, path2):
247
241
    """Compare two paths directory by directory.
248
242
 
249
243
    This is equivalent to doing::
272
266
                        PyString_Size(path2))
273
267
 
274
268
 
275
 
def _cmp_path_by_dirblock(path1, path2):
 
269
def _cmp_path_by_dirblock_c(path1, path2):
276
270
    """Compare two paths based on what directory they are in.
277
271
 
278
272
    This generates a sort order, such that all children of a directory are
294
288
    if not PyString_CheckExact(path2):
295
289
        raise TypeError("'path2' must be a plain string, not %s: %r"
296
290
                        % (type(path2), path2))
297
 
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
298
 
                                        PyString_Size(path1),
299
 
                                        PyString_AsString(path2),
300
 
                                        PyString_Size(path2))
301
 
 
302
 
 
303
 
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
304
 
                                      char *path2, int path2_len): # cannot_raise
 
291
    return _cmp_path_by_dirblock(PyString_AsString(path1),
 
292
                                 PyString_Size(path1),
 
293
                                 PyString_AsString(path2),
 
294
                                 PyString_Size(path2))
 
295
 
 
296
 
 
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
 
298
                               char *path2, int path2_len):
305
299
    """Compare two paths by what directory they are in.
306
300
 
307
 
    see ``_cmp_path_by_dirblock`` for details.
 
301
    see ``_cmp_path_by_dirblock_c`` for details.
308
302
    """
309
303
    cdef char *dirname1
310
304
    cdef int dirname1_len
374
368
    return 1
375
369
 
376
370
 
377
 
def _bisect_path_left(paths, path):
 
371
def _bisect_path_left_c(paths, path):
378
372
    """Return the index where to insert path into paths.
379
373
 
380
374
    This uses a path-wise comparison so we get::
419
413
        cur = PyList_GetItem_object_void(paths, _mid)
420
414
        cur_cstr = PyString_AS_STRING_void(cur)
421
415
        cur_size = PyString_GET_SIZE_void(cur)
422
 
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
423
 
                                        path_cstr, path_size) < 0:
 
416
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
424
417
            _lo = _mid + 1
425
418
        else:
426
419
            _hi = _mid
427
420
    return _lo
428
421
 
429
422
 
430
 
def _bisect_path_right(paths, path):
 
423
def _bisect_path_right_c(paths, path):
431
424
    """Return the index where to insert path into paths.
432
425
 
433
426
    This uses a path-wise comparison so we get::
472
465
        cur = PyList_GetItem_object_void(paths, _mid)
473
466
        cur_cstr = PyString_AS_STRING_void(cur)
474
467
        cur_size = PyString_GET_SIZE_void(cur)
475
 
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
476
 
                                        cur_cstr, cur_size) < 0:
 
468
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
477
469
            _hi = _mid
478
470
        else:
479
471
            _lo = _mid + 1
480
472
    return _lo
481
473
 
482
474
 
483
 
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
 
475
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
484
476
    """Return the index where to insert dirname into the dirblocks.
485
477
 
486
478
    The return value idx is such that all directories blocks in dirblock[:idx]
616
608
        :param new_block: This is to let the caller know that it needs to
617
609
            create a new directory block to store the next entry.
618
610
        """
619
 
        cdef StaticTuple path_name_file_id_key
620
 
        cdef StaticTuple tmp
 
611
        cdef object path_name_file_id_key
621
612
        cdef char *entry_size_cstr
622
613
        cdef unsigned long int entry_size
623
614
        cdef char* executable_cstr
657
648
        # Build up the key that will be used.
658
649
        # By using <object>(void *) Pyrex will automatically handle the
659
650
        # Py_INCREF that we need.
660
 
        cur_dirname = <object>p_current_dirname[0]
661
 
        # Use StaticTuple_New to pre-allocate, rather than creating a regular
662
 
        # tuple and passing it to the StaticTuple constructor.
663
 
        # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
664
 
        #                          self.get_next_str(),
665
 
        #                          self.get_next_str(),
666
 
        #                         )
667
 
        tmp = StaticTuple_New(3)
668
 
        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
669
 
        cur_basename = self.get_next_str()
670
 
        cur_file_id = self.get_next_str()
671
 
        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
672
 
        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
673
 
        path_name_file_id_key = tmp
 
651
        path_name_file_id_key = (<object>p_current_dirname[0],
 
652
                                 self.get_next_str(),
 
653
                                 self.get_next_str(),
 
654
                                )
674
655
 
675
656
        # Parse all of the per-tree information. current has the information in
676
657
        # the same location as parent trees. The only difference is that 'info'
694
675
            executable_cstr = self.get_next(&cur_size)
695
676
            is_executable = (executable_cstr[0] == c'y')
696
677
            info = self.get_next_str()
697
 
            # TODO: If we want to use StaticTuple_New here we need to be pretty
698
 
            #       careful. We are relying on a bit of Pyrex
699
 
            #       automatic-conversion from 'int' to PyInt, and that doesn't
700
 
            #       play well with the StaticTuple_SET_ITEM macro.
701
 
            #       Timing doesn't (yet) show a worthwile improvement in speed
702
 
            #       versus complexity and maintainability.
703
 
            # tmp = StaticTuple_New(5)
704
 
            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
705
 
            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
706
 
            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
707
 
            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
708
 
            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
709
 
            # PyList_Append(trees, tmp)
710
 
            PyList_Append(trees, StaticTuple(
 
678
            PyList_Append(trees, (
711
679
                minikind,     # minikind
712
680
                fingerprint,  # fingerprint
713
681
                entry_size,   # size
776
744
        self.state._split_root_dirblock_into_contents()
777
745
 
778
746
 
779
 
def _read_dirblocks(state):
 
747
def _read_dirblocks_c(state):
780
748
    """Read in the dirblocks for the given DirState object.
781
749
 
782
750
    This is tightly bound to the DirState internal representation. It should be
798
766
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
799
767
 
800
768
 
801
 
cdef int minikind_from_mode(int mode): # cannot_raise
 
769
cdef int minikind_from_mode(int mode):
802
770
    # in order of frequency:
803
771
    if S_ISREG(mode):
804
772
        return c"f"
812
780
_encode = binascii.b2a_base64
813
781
 
814
782
 
 
783
from struct import pack
815
784
cdef _pack_stat(stat_value):
816
785
    """return a string representing the stat value's key fields.
817
786
 
821
790
    cdef char result[6*4] # 6 long ints
822
791
    cdef int *aliased
823
792
    aliased = <int *>result
824
 
    aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
825
 
    # mtime and ctime will often be floats but get converted to PyInt within
826
 
    aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
827
 
    aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
828
 
    aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
829
 
    aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
830
 
    aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
 
793
    aliased[0] = htonl(stat_value.st_size)
 
794
    aliased[1] = htonl(int(stat_value.st_mtime))
 
795
    aliased[2] = htonl(int(stat_value.st_ctime))
 
796
    aliased[3] = htonl(stat_value.st_dev)
 
797
    aliased[4] = htonl(stat_value.st_ino & 0xFFFFFFFF)
 
798
    aliased[5] = htonl(stat_value.st_mode)
831
799
    packed = PyString_FromStringAndSize(result, 6*4)
832
800
    return _encode(packed)[:-1]
833
801
 
834
802
 
835
 
def pack_stat(stat_value):
836
 
    """Convert stat value into a packed representation quickly with pyrex"""
837
 
    return _pack_stat(stat_value)
838
 
 
839
 
 
840
803
def update_entry(self, entry, abspath, stat_value):
841
804
    """Update the entry based on what is actually on disk.
842
805
 
872
835
    # _st mode of the compiled stat objects.
873
836
    cdef int minikind, saved_minikind
874
837
    cdef void * details
875
 
    cdef int worth_saving
876
838
    minikind = minikind_from_mode(stat_value.st_mode)
877
839
    if 0 == minikind:
878
840
        return None
907
869
    # If we have gotten this far, that means that we need to actually
908
870
    # process this entry.
909
871
    link_or_sha1 = None
910
 
    worth_saving = 1
911
872
    if minikind == c'f':
912
873
        executable = self._is_executable(stat_value.st_mode,
913
874
                                         saved_executable)
924
885
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
925
886
                           executable, packed_stat)
926
887
        else:
927
 
            # This file is not worth caching the sha1. Either it is too new, or
928
 
            # it is newly added. Regardless, the only things we are changing
929
 
            # are derived from the stat, and so are not worth caching. So we do
930
 
            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
931
 
            # updated values if there is *other* data worth saving.)
932
 
            entry[1][0] = ('f', '', stat_value.st_size, executable,
933
 
                           DirState.NULLSTAT)
934
 
            worth_saving = 0
 
888
            entry[1][0] = ('f', '', stat_value.st_size,
 
889
                           executable, DirState.NULLSTAT)
935
890
    elif minikind == c'd':
 
891
        link_or_sha1 = None
936
892
        entry[1][0] = ('d', '', 0, False, packed_stat)
937
893
        if saved_minikind != c'd':
938
894
            # This changed from something into a directory. Make sure we
942
898
                self._get_block_entry_index(entry[0][0], entry[0][1], 0)
943
899
            self._ensure_block(block_index, entry_index,
944
900
                               pathjoin(entry[0][0], entry[0][1]))
945
 
        else:
946
 
            # Any changes are derived trivially from the stat object, not worth
947
 
            # re-writing a dirstate for just this
948
 
            worth_saving = 0
949
901
    elif minikind == c'l':
950
 
        if saved_minikind == c'l':
951
 
            # If the object hasn't changed kind, it isn't worth saving the
952
 
            # dirstate just for a symlink. The default is 'fast symlinks' which
953
 
            # save the target in the inode entry, rather than separately. So to
954
 
            # stat, we've already read everything off disk.
955
 
            worth_saving = 0
956
902
        link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
957
903
        if self._cutoff_time is None:
958
904
            self._sha_cutoff_time()
963
909
        else:
964
910
            entry[1][0] = ('l', '', stat_value.st_size,
965
911
                           False, DirState.NULLSTAT)
966
 
    if worth_saving:
967
 
        # Note, even though _mark_modified will only set
968
 
        # IN_MEMORY_HASH_MODIFIED, it still isn't worth 
969
 
        self._mark_modified([entry])
 
912
    self._dirblock_state = DirState.IN_MEMORY_MODIFIED
970
913
    return link_or_sha1
971
914
 
972
915
 
973
 
# TODO: Do we want to worry about exceptions here?
974
 
cdef char _minikind_from_string(object string) except? -1:
 
916
cdef char _minikind_from_string(object string):
975
917
    """Convert a python string to a char."""
976
918
    return PyString_AsString(string)[0]
977
919
 
1009
951
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
1010
952
 
1011
953
 
1012
 
cdef int _versioned_minikind(char minikind): # cannot_raise
 
954
cdef int _versioned_minikind(char minikind):
1013
955
    """Return non-zero if minikind is in fltd"""
1014
956
    return (minikind == c'f' or
1015
957
            minikind == c'd' or
1019
961
 
1020
962
cdef class ProcessEntryC:
1021
963
 
1022
 
    cdef int doing_consistency_expansion
1023
964
    cdef object old_dirname_to_file_id # dict
1024
965
    cdef object new_dirname_to_file_id # dict
 
966
    cdef readonly object uninteresting
1025
967
    cdef object last_source_parent
1026
968
    cdef object last_target_parent
1027
 
    cdef int include_unchanged
1028
 
    cdef int partial
 
969
    cdef object include_unchanged
1029
970
    cdef object use_filesystem_for_exec
1030
971
    cdef object utf8_decode
1031
972
    cdef readonly object searched_specific_files
1032
 
    cdef readonly object searched_exact_paths
1033
973
    cdef object search_specific_files
1034
 
    # The parents up to the root of the paths we are searching.
1035
 
    # After all normal paths are returned, these specific items are returned.
1036
 
    cdef object search_specific_file_parents
1037
974
    cdef object state
1038
975
    # Current iteration variables:
1039
976
    cdef object current_root
1051
988
    cdef object current_block_list
1052
989
    cdef object current_dir_info
1053
990
    cdef object current_dir_list
1054
 
    cdef object _pending_consistent_entries # list
1055
991
    cdef int path_index
1056
992
    cdef object root_dir_info
1057
993
    cdef object bisect_left
1058
994
    cdef object pathjoin
1059
995
    cdef object fstat
1060
 
    # A set of the ids we've output when doing partial output.
1061
 
    cdef object seen_ids
1062
996
    cdef object sha_file
1063
997
 
1064
998
    def __init__(self, include_unchanged, use_filesystem_for_exec,
1065
999
        search_specific_files, state, source_index, target_index,
1066
1000
        want_unversioned, tree):
1067
 
        self.doing_consistency_expansion = 0
1068
1001
        self.old_dirname_to_file_id = {}
1069
1002
        self.new_dirname_to_file_id = {}
1070
 
        # Are we doing a partial iter_changes?
1071
 
        self.partial = set(['']).__ne__(search_specific_files)
 
1003
        # Just a sentry, so that _process_entry can say that this
 
1004
        # record is handled, but isn't interesting to process (unchanged)
 
1005
        self.uninteresting = object()
1072
1006
        # Using a list so that we can access the values and change them in
1073
1007
        # nested scope. Each one is [path, file_id, entry]
1074
1008
        self.last_source_parent = [None, None]
1075
1009
        self.last_target_parent = [None, None]
1076
 
        if include_unchanged is None:
1077
 
            self.include_unchanged = False
1078
 
        else:
1079
 
            self.include_unchanged = int(include_unchanged)
 
1010
        self.include_unchanged = include_unchanged
1080
1011
        self.use_filesystem_for_exec = use_filesystem_for_exec
1081
1012
        self.utf8_decode = cache_utf8._utf8_decode
1082
1013
        # for all search_indexs in each path at or under each element of
1083
 
        # search_specific_files, if the detail is relocated: add the id, and
1084
 
        # add the relocated path as one to search if its not searched already.
1085
 
        # If the detail is not relocated, add the id.
 
1014
        # search_specific_files, if the detail is relocated: add the id, and add the
 
1015
        # relocated path as one to search if its not searched already. If the
 
1016
        # detail is not relocated, add the id.
1086
1017
        self.searched_specific_files = set()
1087
 
        # When we search exact paths without expanding downwards, we record
1088
 
        # that here.
1089
 
        self.searched_exact_paths = set()
1090
1018
        self.search_specific_files = search_specific_files
1091
 
        # The parents up to the root of the paths we are searching.
1092
 
        # After all normal paths are returned, these specific items are returned.
1093
 
        self.search_specific_file_parents = set()
1094
 
        # The ids we've sent out in the delta.
1095
 
        self.seen_ids = set()
1096
1019
        self.state = state
1097
1020
        self.current_root = None
1098
1021
        self.current_root_unicode = None
1114
1037
        self.current_block_pos = -1
1115
1038
        self.current_dir_info = None
1116
1039
        self.current_dir_list = None
1117
 
        self._pending_consistent_entries = []
1118
1040
        self.path_index = 0
1119
1041
        self.root_dir_info = None
1120
1042
        self.bisect_left = bisect.bisect_left
1121
1043
        self.pathjoin = osutils.pathjoin
1122
1044
        self.fstat = os.fstat
1123
1045
        self.sha_file = osutils.sha_file
1124
 
        if target_index != 0:
1125
 
            # A lot of code in here depends on target_index == 0
1126
 
            raise errors.BzrError('unsupported target index')
1127
1046
 
1128
1047
    cdef _process_entry(self, entry, path_info):
1129
1048
        """Compare an entry and real disk to generate delta information.
1130
1049
 
1131
1050
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1132
 
            the path of entry. If None, then the path is considered absent in 
1133
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
1051
            the path of entry. If None, then the path is considered absent.
 
1052
            (Perhaps we should pass in a concrete entry for this ?)
1134
1053
            Basename is returned as a utf8 string because we expect this
1135
1054
            tuple will be ignored, and don't want to take the time to
1136
1055
            decode.
1137
 
        :return: (iter_changes_result, changed). If the entry has not been
1138
 
            handled then changed is None. Otherwise it is False if no content
1139
 
            or metadata changes have occured, and True if any content or
1140
 
            metadata change has occurred. If self.include_unchanged is True then
1141
 
            if changed is not None, iter_changes_result will always be a result
1142
 
            tuple. Otherwise, iter_changes_result is None unless changed is
1143
 
            True.
 
1056
        :return: None if the these don't match
 
1057
                 A tuple of information about the change, or
 
1058
                 the object 'uninteresting' if these match, but are
 
1059
                 basically identical.
1144
1060
        """
1145
1061
        cdef char target_minikind
1146
1062
        cdef char source_minikind
1182
1098
            else:
1183
1099
                # add the source to the search path to find any children it
1184
1100
                # has.  TODO ? : only add if it is a container ?
1185
 
                if (not self.doing_consistency_expansion and 
1186
 
                    not osutils.is_inside_any(self.searched_specific_files,
1187
 
                                             source_details[1])):
 
1101
                if not osutils.is_inside_any(self.searched_specific_files,
 
1102
                                             source_details[1]):
1188
1103
                    self.search_specific_files.add(source_details[1])
1189
 
                    # expanding from a user requested path, parent expansion
1190
 
                    # for delta consistency happens later.
1191
1104
                # generate the old path; this is needed for stating later
1192
1105
                # as well.
1193
1106
                old_path = source_details[1]
1227
1140
                    if source_minikind != c'f':
1228
1141
                        content_change = 1
1229
1142
                    else:
1230
 
                        # Check the sha. We can't just rely on the size as
1231
 
                        # content filtering may mean differ sizes actually
1232
 
                        # map to the same content
1233
 
                        if link_or_sha1 is None:
1234
 
                            # Stat cache miss:
1235
 
                            statvalue, link_or_sha1 = \
1236
 
                                self.state._sha1_provider.stat_and_sha1(
1237
 
                                path_info[4])
1238
 
                            self.state._observed_sha1(entry, link_or_sha1,
1239
 
                                statvalue)
1240
 
                        content_change = (link_or_sha1 != source_details[1])
 
1143
                        # If the size is the same, check the sha:
 
1144
                        if target_details[2] == source_details[2]:
 
1145
                            if link_or_sha1 is None:
 
1146
                                # Stat cache miss:
 
1147
                                statvalue, link_or_sha1 = \
 
1148
                                    self.state._sha1_provider.stat_and_sha1(
 
1149
                                    path_info[4])
 
1150
                                self.state._observed_sha1(entry, link_or_sha1,
 
1151
                                    statvalue)
 
1152
                            content_change = (link_or_sha1 != source_details[1])
 
1153
                        else:
 
1154
                            # Size changed, so must be different
 
1155
                            content_change = 1
1241
1156
                    # Target details is updated at update_entry time
1242
1157
                    if self.use_filesystem_for_exec:
1243
1158
                        # We don't need S_ISREG here, because we are sure
1258
1173
                        content_change = 0
1259
1174
                    target_exec = False
1260
1175
                else:
1261
 
                    if path is None:
1262
 
                        path = self.pathjoin(old_dirname, old_basename)
1263
 
                    raise errors.BadFileKindError(path, path_info[2])
 
1176
                    raise Exception, "unknown kind %s" % path_info[2]
1264
1177
            if source_minikind == c'd':
1265
1178
                if path is None:
1266
1179
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1268
1181
                    file_id = entry[0][2]
1269
1182
                self.old_dirname_to_file_id[old_path] = file_id
1270
1183
            # parent id is the entry for the path in the target tree
1271
 
            if old_basename and old_dirname == self.last_source_parent[0]:
1272
 
                # use a cached hit for non-root source entries.
 
1184
            if old_dirname == self.last_source_parent[0]:
1273
1185
                source_parent_id = self.last_source_parent[1]
1274
1186
            else:
1275
1187
                try:
1276
1188
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
1277
 
                except KeyError, _:
 
1189
                except KeyError:
1278
1190
                    source_parent_entry = self.state._get_entry(self.source_index,
1279
1191
                                                           path_utf8=old_dirname)
1280
1192
                    source_parent_id = source_parent_entry[0][2]
1285
1197
                    self.last_source_parent[0] = old_dirname
1286
1198
                    self.last_source_parent[1] = source_parent_id
1287
1199
            new_dirname = entry[0][0]
1288
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
1289
 
                # use a cached hit for non-root target entries.
 
1200
            if new_dirname == self.last_target_parent[0]:
1290
1201
                target_parent_id = self.last_target_parent[1]
1291
1202
            else:
1292
1203
                try:
1293
1204
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
1294
 
                except KeyError, _:
 
1205
                except KeyError:
1295
1206
                    # TODO: We don't always need to do the lookup, because the
1296
1207
                    #       parent entry will be the same as the source entry.
1297
1208
                    target_parent_entry = self.state._get_entry(self.target_index,
1309
1220
                    self.last_target_parent[1] = target_parent_id
1310
1221
 
1311
1222
            source_exec = source_details[3]
1312
 
            changed = (content_change
 
1223
            if (self.include_unchanged
 
1224
                or content_change
1313
1225
                or source_parent_id != target_parent_id
1314
1226
                or old_basename != entry[0][1]
1315
1227
                or source_exec != target_exec
1316
 
                )
1317
 
            if not changed and not self.include_unchanged:
1318
 
                return None, False
1319
 
            else:
 
1228
                ):
1320
1229
                if old_path is None:
1321
1230
                    path = self.pathjoin(old_dirname, old_basename)
1322
1231
                    old_path = path
1336
1245
                       (source_parent_id, target_parent_id),
1337
1246
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1338
1247
                       (source_kind, target_kind),
1339
 
                       (source_exec, target_exec)), changed
 
1248
                       (source_exec, target_exec))
 
1249
            else:
 
1250
                return self.uninteresting
1340
1251
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1341
1252
            # looks like a new file
1342
1253
            path = self.pathjoin(entry[0][0], entry[0][1])
1369
1280
                       (None, parent_id),
1370
1281
                       (None, self.utf8_decode(entry[0][1])[0]),
1371
1282
                       (None, path_info[2]),
1372
 
                       (None, target_exec)), True
 
1283
                       (None, target_exec))
1373
1284
            else:
1374
1285
                # Its a missing file, report it as such.
1375
1286
                return (entry[0][2],
1379
1290
                       (None, parent_id),
1380
1291
                       (None, self.utf8_decode(entry[0][1])[0]),
1381
1292
                       (None, None),
1382
 
                       (None, False)), True
 
1293
                       (None, False))
1383
1294
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1384
1295
            # unversioned, possibly, or possibly not deleted: we dont care.
1385
1296
            # if its still on disk, *and* theres no other entry at this
1397
1308
                   (parent_id, None),
1398
1309
                   (self.utf8_decode(entry[0][1])[0], None),
1399
1310
                   (_minikind_to_kind(source_minikind), None),
1400
 
                   (source_details[3], None)), True
 
1311
                   (source_details[3], None))
1401
1312
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1402
1313
            # a rename; could be a true rename, or a rename inherited from
1403
1314
            # a renamed parent. TODO: handle this efficiently. Its not
1404
1315
            # common case to rename dirs though, so a correct but slow
1405
1316
            # implementation will do.
1406
 
            if (not self.doing_consistency_expansion and 
1407
 
                not osutils.is_inside_any(self.searched_specific_files,
1408
 
                    target_details[1])):
 
1317
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1409
1318
                self.search_specific_files.add(target_details[1])
1410
 
                # We don't expand the specific files parents list here as
1411
 
                # the path is absent in target and won't create a delta with
1412
 
                # missing parent.
1413
1319
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1414
1320
              (target_minikind == c'r' or target_minikind == c'a')):
1415
1321
            # neither of the selected trees contain this path,
1421
1327
                "source_minikind=%r, target_minikind=%r"
1422
1328
                % (source_minikind, target_minikind))
1423
1329
            ## import pdb;pdb.set_trace()
1424
 
        return None, None
 
1330
        return None
1425
1331
 
1426
1332
    def __iter__(self):
1427
1333
        return self
1429
1335
    def iter_changes(self):
1430
1336
        return self
1431
1337
 
1432
 
    cdef int _gather_result_for_consistency(self, result) except -1:
1433
 
        """Check a result we will yield to make sure we are consistent later.
1434
 
        
1435
 
        This gathers result's parents into a set to output later.
1436
 
 
1437
 
        :param result: A result tuple.
1438
 
        """
1439
 
        if not self.partial or not result[0]:
1440
 
            return 0
1441
 
        self.seen_ids.add(result[0])
1442
 
        new_path = result[1][1]
1443
 
        if new_path:
1444
 
            # Not the root and not a delete: queue up the parents of the path.
1445
 
            self.search_specific_file_parents.update(
1446
 
                osutils.parent_directories(new_path.encode('utf8')))
1447
 
            # Add the root directory which parent_directories does not
1448
 
            # provide.
1449
 
            self.search_specific_file_parents.add('')
1450
 
        return 0
1451
 
 
1452
 
    cdef int _update_current_block(self) except -1:
 
1338
    cdef void _update_current_block(self):
1453
1339
        if (self.block_index < len(self.state._dirblocks) and
1454
1340
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1455
1341
            self.current_block = self.state._dirblocks[self.block_index]
1458
1344
        else:
1459
1345
            self.current_block = None
1460
1346
            self.current_block_list = None
1461
 
        return 0
1462
1347
 
1463
1348
    def __next__(self):
1464
1349
        # Simple thunk to allow tail recursion without pyrex confusion
1516
1401
        cdef char * current_dirname_c, * current_blockname_c
1517
1402
        cdef int advance_entry, advance_path
1518
1403
        cdef int path_handled
 
1404
        uninteresting = self.uninteresting
1519
1405
        searched_specific_files = self.searched_specific_files
1520
1406
        # Are we walking a root?
1521
1407
        while self.root_entries_pos < self.root_entries_len:
1522
1408
            entry = self.root_entries[self.root_entries_pos]
1523
1409
            self.root_entries_pos = self.root_entries_pos + 1
1524
 
            result, changed = self._process_entry(entry, self.root_dir_info)
1525
 
            if changed is not None:
1526
 
                if changed:
1527
 
                    self._gather_result_for_consistency(result)
1528
 
                if changed or self.include_unchanged:
1529
 
                    return result
 
1410
            result = self._process_entry(entry, self.root_dir_info)
 
1411
            if result is not None and result is not self.uninteresting:
 
1412
                return result
1530
1413
        # Have we finished the prior root, or never started one ?
1531
1414
        if self.current_root is None:
1532
1415
            # TODO: the pending list should be lexically sorted?  the
1533
1416
            # interface doesn't require it.
1534
1417
            try:
1535
1418
                self.current_root = self.search_specific_files.pop()
1536
 
            except KeyError, _:
 
1419
            except KeyError:
1537
1420
                raise StopIteration()
 
1421
            self.current_root_unicode = self.current_root.decode('utf8')
1538
1422
            self.searched_specific_files.add(self.current_root)
1539
1423
            # process the entries for this containing directory: the rest will be
1540
1424
            # found by their parents recursively.
1541
1425
            self.root_entries = self.state._entries_for_path(self.current_root)
1542
1426
            self.root_entries_len = len(self.root_entries)
1543
 
            self.current_root_unicode = self.current_root.decode('utf8')
1544
1427
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1545
1428
            try:
1546
1429
                root_stat = os.lstat(self.root_abspath)
1574
1457
            while self.root_entries_pos < self.root_entries_len:
1575
1458
                entry = self.root_entries[self.root_entries_pos]
1576
1459
                self.root_entries_pos = self.root_entries_pos + 1
1577
 
                result, changed = self._process_entry(entry, self.root_dir_info)
1578
 
                if changed is not None:
 
1460
                result = self._process_entry(entry, self.root_dir_info)
 
1461
                if result is not None:
1579
1462
                    path_handled = -1
1580
 
                    if changed:
1581
 
                        self._gather_result_for_consistency(result)
1582
 
                    if changed or self.include_unchanged:
 
1463
                    if result is not self.uninteresting:
1583
1464
                        return result
1584
1465
            # handle unversioned specified paths:
1585
1466
            if self.want_unversioned and not path_handled and self.root_dir_info:
1597
1478
                      )
1598
1479
            # If we reach here, the outer flow continues, which enters into the
1599
1480
            # per-root setup logic.
1600
 
        if (self.current_dir_info is None and self.current_block is None and not
1601
 
            self.doing_consistency_expansion):
 
1481
        if self.current_dir_info is None and self.current_block is None:
1602
1482
            # setup iteration of this root:
1603
1483
            self.current_dir_list = None
1604
1484
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1622
1502
                        #            and e.winerror == ERROR_DIRECTORY
1623
1503
                        try:
1624
1504
                            e_winerror = e.winerror
1625
 
                        except AttributeError, _:
 
1505
                        except AttributeError:
1626
1506
                            e_winerror = None
1627
1507
                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
1628
1508
                        if (e.errno in win_errors or e_winerror in win_errors):
1711
1591
                    try:
1712
1592
                        self.current_dir_info = self.dir_iterator.next()
1713
1593
                        self.current_dir_list = self.current_dir_info[1]
1714
 
                    except StopIteration, _:
 
1594
                    except StopIteration:
1715
1595
                        self.current_dir_info = None
1716
1596
                else: #(dircmp > 0)
1717
1597
                    # We have a dirblock entry for this location, but there
1726
1606
                        self.current_block_pos = self.current_block_pos + 1
1727
1607
                        # entry referring to file not present on disk.
1728
1608
                        # advance the entry only, after processing.
1729
 
                        result, changed = self._process_entry(current_entry, None)
1730
 
                        if changed is not None:
1731
 
                            if changed:
1732
 
                                self._gather_result_for_consistency(result)
1733
 
                            if changed or self.include_unchanged:
 
1609
                        result = self._process_entry(current_entry, None)
 
1610
                        if result is not None:
 
1611
                            if result is not self.uninteresting:
1734
1612
                                return result
1735
1613
                    self.block_index = self.block_index + 1
1736
1614
                    self._update_current_block()
1742
1620
            # More supplied paths to process
1743
1621
            self.current_root = None
1744
1622
            return self._iter_next()
1745
 
        # Start expanding more conservatively, adding paths the user may not
1746
 
        # have intended but required for consistent deltas.
1747
 
        self.doing_consistency_expansion = 1
1748
 
        if not self._pending_consistent_entries:
1749
 
            self._pending_consistent_entries = self._next_consistent_entries()
1750
 
        while self._pending_consistent_entries:
1751
 
            result, changed = self._pending_consistent_entries.pop()
1752
 
            if changed is not None:
1753
 
                return result
1754
1623
        raise StopIteration()
1755
1624
 
1756
1625
    cdef object _maybe_tree_ref(self, current_path_info):
1799
1668
                advance_entry = -1
1800
1669
                advance_path = -1
1801
1670
                result = None
1802
 
                changed = None
1803
1671
                path_handled = 0
1804
1672
                if current_entry is None:
1805
1673
                    # unversioned -  the check for path_handled when the path
1807
1675
                    pass
1808
1676
                elif current_path_info is None:
1809
1677
                    # no path is fine: the per entry code will handle it.
1810
 
                    result, changed = self._process_entry(current_entry,
1811
 
                        current_path_info)
 
1678
                    result = self._process_entry(current_entry, current_path_info)
 
1679
                    if result is not None:
 
1680
                        if result is self.uninteresting:
 
1681
                            result = None
1812
1682
                else:
1813
1683
                    minikind = _minikind_from_string(
1814
1684
                        current_entry[1][self.target_index][0])
1829
1699
                        else:
1830
1700
                            # entry referring to file not present on disk.
1831
1701
                            # advance the entry only, after processing.
1832
 
                            result, changed = self._process_entry(current_entry,
1833
 
                                None)
 
1702
                            result = self._process_entry(current_entry, None)
 
1703
                            if result is not None:
 
1704
                                if result is self.uninteresting:
 
1705
                                    result = None
1834
1706
                            advance_path = 0
1835
1707
                    else:
1836
1708
                        # paths are the same,and the dirstate entry is not
1837
1709
                        # absent or renamed.
1838
 
                        result, changed = self._process_entry(current_entry,
1839
 
                            current_path_info)
1840
 
                        if changed is not None:
 
1710
                        result = self._process_entry(current_entry, current_path_info)
 
1711
                        if result is not None:
1841
1712
                            path_handled = -1
1842
 
                            if not changed and not self.include_unchanged:
1843
 
                                changed = None
 
1713
                            if result is self.uninteresting:
 
1714
                                result = None
1844
1715
                # >- loop control starts here:
1845
1716
                # >- entry
1846
1717
                if advance_entry and current_entry is not None:
1859
1730
                                and stat.S_IEXEC & current_path_info[3].st_mode)
1860
1731
                            try:
1861
1732
                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
1862
 
                            except UnicodeDecodeError, _:
 
1733
                            except UnicodeDecodeError:
1863
1734
                                raise errors.BadFilenameEncoding(
1864
1735
                                    current_path_info[0], osutils._fs_enc)
1865
 
                            if changed is not None:
 
1736
                            if result is not None:
1866
1737
                                raise AssertionError(
1867
1738
                                    "result is not None: %r" % result)
1868
1739
                            result = (None,
1873
1744
                                (None, self.utf8_decode(current_path_info[1])[0]),
1874
1745
                                (None, current_path_info[2]),
1875
1746
                                (None, new_executable))
1876
 
                            changed = True
1877
1747
                        # dont descend into this unversioned path if it is
1878
1748
                        # a dir
1879
1749
                        if current_path_info[2] in ('directory'):
1892
1762
                                current_path_info)
1893
1763
                    else:
1894
1764
                        current_path_info = None
1895
 
                if changed is not None:
 
1765
                if result is not None:
1896
1766
                    # Found a result on this pass, yield it
1897
 
                    if changed:
1898
 
                        self._gather_result_for_consistency(result)
1899
 
                    if changed or self.include_unchanged:
1900
 
                        return result
 
1767
                    return result
1901
1768
            if self.current_block is not None:
1902
1769
                self.block_index = self.block_index + 1
1903
1770
                self._update_current_block()
1907
1774
                try:
1908
1775
                    self.current_dir_info = self.dir_iterator.next()
1909
1776
                    self.current_dir_list = self.current_dir_info[1]
1910
 
                except StopIteration, _:
 
1777
                except StopIteration:
1911
1778
                    self.current_dir_info = None
1912
 
 
1913
 
    cdef object _next_consistent_entries(self):
1914
 
        """Grabs the next specific file parent case to consider.
1915
 
        
1916
 
        :return: A list of the results, each of which is as for _process_entry.
1917
 
        """
1918
 
        results = []
1919
 
        while self.search_specific_file_parents:
1920
 
            # Process the parent directories for the paths we were iterating.
1921
 
            # Even in extremely large trees this should be modest, so currently
1922
 
            # no attempt is made to optimise.
1923
 
            path_utf8 = self.search_specific_file_parents.pop()
1924
 
            if path_utf8 in self.searched_exact_paths:
1925
 
                # We've examined this path.
1926
 
                continue
1927
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1928
 
                # We've examined this path.
1929
 
                continue
1930
 
            path_entries = self.state._entries_for_path(path_utf8)
1931
 
            # We need either one or two entries. If the path in
1932
 
            # self.target_index has moved (so the entry in source_index is in
1933
 
            # 'ar') then we need to also look for the entry for this path in
1934
 
            # self.source_index, to output the appropriate delete-or-rename.
1935
 
            selected_entries = []
1936
 
            found_item = False
1937
 
            for candidate_entry in path_entries:
1938
 
                # Find entries present in target at this path:
1939
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
1940
 
                    found_item = True
1941
 
                    selected_entries.append(candidate_entry)
1942
 
                # Find entries present in source at this path:
1943
 
                elif (self.source_index is not None and
1944
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
1945
 
                    found_item = True
1946
 
                    if candidate_entry[1][self.target_index][0] == 'a':
1947
 
                        # Deleted, emit it here.
1948
 
                        selected_entries.append(candidate_entry)
1949
 
                    else:
1950
 
                        # renamed, emit it when we process the directory it
1951
 
                        # ended up at.
1952
 
                        self.search_specific_file_parents.add(
1953
 
                            candidate_entry[1][self.target_index][1])
1954
 
            if not found_item:
1955
 
                raise AssertionError(
1956
 
                    "Missing entry for specific path parent %r, %r" % (
1957
 
                    path_utf8, path_entries))
1958
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1959
 
            for entry in selected_entries:
1960
 
                if entry[0][2] in self.seen_ids:
1961
 
                    continue
1962
 
                result, changed = self._process_entry(entry, path_info)
1963
 
                if changed is None:
1964
 
                    raise AssertionError(
1965
 
                        "Got entry<->path mismatch for specific path "
1966
 
                        "%r entry %r path_info %r " % (
1967
 
                        path_utf8, entry, path_info))
1968
 
                # Only include changes - we're outside the users requested
1969
 
                # expansion.
1970
 
                if changed:
1971
 
                    self._gather_result_for_consistency(result)
1972
 
                    if (result[6][0] == 'directory' and
1973
 
                        result[6][1] != 'directory'):
1974
 
                        # This stopped being a directory, the old children have
1975
 
                        # to be included.
1976
 
                        if entry[1][self.source_index][0] == 'r':
1977
 
                            # renamed, take the source path
1978
 
                            entry_path_utf8 = entry[1][self.source_index][1]
1979
 
                        else:
1980
 
                            entry_path_utf8 = path_utf8
1981
 
                        initial_key = (entry_path_utf8, '', '')
1982
 
                        block_index, _ = self.state._find_block_index_from_key(
1983
 
                            initial_key)
1984
 
                        if block_index == 0:
1985
 
                            # The children of the root are in block index 1.
1986
 
                            block_index = block_index + 1
1987
 
                        current_block = None
1988
 
                        if block_index < len(self.state._dirblocks):
1989
 
                            current_block = self.state._dirblocks[block_index]
1990
 
                            if not osutils.is_inside(
1991
 
                                entry_path_utf8, current_block[0]):
1992
 
                                # No entries for this directory at all.
1993
 
                                current_block = None
1994
 
                        if current_block is not None:
1995
 
                            for entry in current_block[1]:
1996
 
                                if entry[1][self.source_index][0] in 'ar':
1997
 
                                    # Not in the source tree, so doesn't have to be
1998
 
                                    # included.
1999
 
                                    continue
2000
 
                                # Path of the entry itself.
2001
 
                                self.search_specific_file_parents.add(
2002
 
                                    self.pathjoin(*entry[0][:2]))
2003
 
                if changed or self.include_unchanged:
2004
 
                    results.append((result, changed))
2005
 
            self.searched_exact_paths.add(path_utf8)
2006
 
        return results
2007
 
 
2008
 
    cdef object _path_info(self, utf8_path, unicode_path):
2009
 
        """Generate path_info for unicode_path.
2010
 
 
2011
 
        :return: None if unicode_path does not exist, or a path_info tuple.
2012
 
        """
2013
 
        abspath = self.tree.abspath(unicode_path)
2014
 
        try:
2015
 
            stat = os.lstat(abspath)
2016
 
        except OSError, e:
2017
 
            if e.errno == errno.ENOENT:
2018
 
                # the path does not exist.
2019
 
                return None
2020
 
            else:
2021
 
                raise
2022
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
2023
 
        dir_info = (utf8_path, utf8_basename,
2024
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
2025
 
            abspath)
2026
 
        if dir_info[2] == 'directory':
2027
 
            if self.tree._directory_is_tree_reference(
2028
 
                unicode_path):
2029
 
                self.root_dir_info = self.root_dir_info[:2] + \
2030
 
                    ('tree-reference',) + self.root_dir_info[3:]
2031
 
        return dir_info