288
294
if not PyString_CheckExact(path2):
289
295
raise TypeError("'path2' must be a plain string, not %s: %r"
290
296
% (type(path2), path2))
291
return _cmp_path_by_dirblock(PyString_AsString(path1),
292
PyString_Size(path1),
293
PyString_AsString(path2),
294
PyString_Size(path2))
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
char *path2, int path2_len):
297
return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
298
PyString_Size(path1),
299
PyString_AsString(path2),
300
PyString_Size(path2))
303
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
304
char *path2, int path2_len): # cannot_raise
299
305
"""Compare two paths by what directory they are in.
301
see ``_cmp_path_by_dirblock_c`` for details.
307
see ``_cmp_path_by_dirblock`` for details.
303
309
cdef char *dirname1
304
310
cdef int dirname1_len
648
657
# Build up the key that will be used.
649
658
# By using <object>(void *) Pyrex will automatically handle the
650
659
# Py_INCREF that we need.
651
path_name_file_id_key = (<object>p_current_dirname[0],
660
cur_dirname = <object>p_current_dirname[0]
661
# Use StaticTuple_New to pre-allocate, rather than creating a regular
662
# tuple and passing it to the StaticTuple constructor.
663
# path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
664
# self.get_next_str(),
665
# self.get_next_str(),
667
tmp = StaticTuple_New(3)
668
Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
669
cur_basename = self.get_next_str()
670
cur_file_id = self.get_next_str()
671
Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
672
Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
673
path_name_file_id_key = tmp
656
675
# Parse all of the per-tree information. current has the information in
657
676
# the same location as parent trees. The only difference is that 'info'
675
694
executable_cstr = self.get_next(&cur_size)
676
695
is_executable = (executable_cstr[0] == c'y')
677
696
info = self.get_next_str()
678
PyList_Append(trees, (
697
# TODO: If we want to use StaticTuple_New here we need to be pretty
698
# careful. We are relying on a bit of Pyrex
699
# automatic-conversion from 'int' to PyInt, and that doesn't
700
# play well with the StaticTuple_SET_ITEM macro.
701
# Timing doesn't (yet) show a worthwile improvement in speed
702
# versus complexity and maintainability.
703
# tmp = StaticTuple_New(5)
704
# Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
705
# Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
706
# Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
707
# Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
708
# Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
709
# PyList_Append(trees, tmp)
710
PyList_Append(trees, StaticTuple(
679
711
minikind, # minikind
680
712
fingerprint, # fingerprint
681
713
entry_size, # size
790
822
cdef char result[6*4] # 6 long ints
791
823
cdef int *aliased
792
824
aliased = <int *>result
793
aliased[0] = htonl(stat_value.st_size)
794
aliased[1] = htonl(int(stat_value.st_mtime))
795
aliased[2] = htonl(int(stat_value.st_ctime))
796
aliased[3] = htonl(stat_value.st_dev)
797
aliased[4] = htonl(stat_value.st_ino & 0xFFFFFFFF)
798
aliased[5] = htonl(stat_value.st_mode)
825
aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
826
# mtime and ctime will often be floats but get converted to PyInt within
827
aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
828
aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
829
aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
830
aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
831
aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
799
832
packed = PyString_FromStringAndSize(result, 6*4)
800
833
return _encode(packed)[:-1]
885
920
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
886
921
executable, packed_stat)
888
entry[1][0] = ('f', '', stat_value.st_size,
889
executable, DirState.NULLSTAT)
923
# This file is not worth caching the sha1. Either it is too new, or
924
# it is newly added. Regardless, the only things we are changing
925
# are derived from the stat, and so are not worth caching. So we do
926
# *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
927
# updated values if there is *other* data worth saving.)
928
entry[1][0] = ('f', '', stat_value.st_size, executable,
890
931
elif minikind == c'd':
892
932
entry[1][0] = ('d', '', 0, False, packed_stat)
893
933
if saved_minikind != c'd':
894
934
# This changed from something into a directory. Make sure we
898
938
self._get_block_entry_index(entry[0][0], entry[0][1], 0)
899
939
self._ensure_block(block_index, entry_index,
900
940
pathjoin(entry[0][0], entry[0][1]))
942
# Any changes are derived trivially from the stat object, not worth
943
# re-writing a dirstate for just this
901
945
elif minikind == c'l':
946
if saved_minikind == c'l':
947
# If the object hasn't changed kind, it isn't worth saving the
948
# dirstate just for a symlink. The default is 'fast symlinks' which
949
# save the target in the inode entry, rather than separately. So to
950
# stat, we've already read everything off disk.
902
952
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
903
953
if self._cutoff_time is None:
904
954
self._sha_cutoff_time()
962
1016
cdef class ProcessEntryC:
1018
cdef int doing_consistency_expansion
964
1019
cdef object old_dirname_to_file_id # dict
965
1020
cdef object new_dirname_to_file_id # dict
966
cdef readonly object uninteresting
967
1021
cdef object last_source_parent
968
1022
cdef object last_target_parent
969
cdef object include_unchanged
1023
cdef int include_unchanged
970
1025
cdef object use_filesystem_for_exec
971
1026
cdef object utf8_decode
972
1027
cdef readonly object searched_specific_files
1028
cdef readonly object searched_exact_paths
973
1029
cdef object search_specific_files
1030
# The parents up to the root of the paths we are searching.
1031
# After all normal paths are returned, these specific items are returned.
1032
cdef object search_specific_file_parents
974
1033
cdef object state
975
1034
# Current iteration variables:
976
1035
cdef object current_root
988
1047
cdef object current_block_list
989
1048
cdef object current_dir_info
990
1049
cdef object current_dir_list
1050
cdef object _pending_consistent_entries # list
991
1051
cdef int path_index
992
1052
cdef object root_dir_info
993
1053
cdef object bisect_left
994
1054
cdef object pathjoin
995
1055
cdef object fstat
1056
# A set of the ids we've output when doing partial output.
1057
cdef object seen_ids
996
1058
cdef object sha_file
998
1060
def __init__(self, include_unchanged, use_filesystem_for_exec,
999
1061
search_specific_files, state, source_index, target_index,
1000
1062
want_unversioned, tree):
1063
self.doing_consistency_expansion = 0
1001
1064
self.old_dirname_to_file_id = {}
1002
1065
self.new_dirname_to_file_id = {}
1003
# Just a sentry, so that _process_entry can say that this
1004
# record is handled, but isn't interesting to process (unchanged)
1005
self.uninteresting = object()
1066
# Are we doing a partial iter_changes?
1067
self.partial = set(['']).__ne__(search_specific_files)
1006
1068
# Using a list so that we can access the values and change them in
1007
1069
# nested scope. Each one is [path, file_id, entry]
1008
1070
self.last_source_parent = [None, None]
1009
1071
self.last_target_parent = [None, None]
1010
self.include_unchanged = include_unchanged
1072
if include_unchanged is None:
1073
self.include_unchanged = False
1075
self.include_unchanged = int(include_unchanged)
1011
1076
self.use_filesystem_for_exec = use_filesystem_for_exec
1012
1077
self.utf8_decode = cache_utf8._utf8_decode
1013
1078
# for all search_indexs in each path at or under each element of
1014
# search_specific_files, if the detail is relocated: add the id, and add the
1015
# relocated path as one to search if its not searched already. If the
1016
# detail is not relocated, add the id.
1079
# search_specific_files, if the detail is relocated: add the id, and
1080
# add the relocated path as one to search if its not searched already.
1081
# If the detail is not relocated, add the id.
1017
1082
self.searched_specific_files = set()
1083
# When we search exact paths without expanding downwards, we record
1085
self.searched_exact_paths = set()
1018
1086
self.search_specific_files = search_specific_files
1087
# The parents up to the root of the paths we are searching.
1088
# After all normal paths are returned, these specific items are returned.
1089
self.search_specific_file_parents = set()
1090
# The ids we've sent out in the delta.
1091
self.seen_ids = set()
1019
1092
self.state = state
1020
1093
self.current_root = None
1021
1094
self.current_root_unicode = None
1037
1110
self.current_block_pos = -1
1038
1111
self.current_dir_info = None
1039
1112
self.current_dir_list = None
1113
self._pending_consistent_entries = []
1040
1114
self.path_index = 0
1041
1115
self.root_dir_info = None
1042
1116
self.bisect_left = bisect.bisect_left
1043
1117
self.pathjoin = osutils.pathjoin
1044
1118
self.fstat = os.fstat
1045
1119
self.sha_file = osutils.sha_file
1120
if target_index != 0:
1121
# A lot of code in here depends on target_index == 0
1122
raise errors.BzrError('unsupported target index')
1047
1124
cdef _process_entry(self, entry, path_info):
1048
1125
"""Compare an entry and real disk to generate delta information.
1050
1127
:param path_info: top_relpath, basename, kind, lstat, abspath for
1051
the path of entry. If None, then the path is considered absent.
1052
(Perhaps we should pass in a concrete entry for this ?)
1128
the path of entry. If None, then the path is considered absent in
1129
the target (Perhaps we should pass in a concrete entry for this ?)
1053
1130
Basename is returned as a utf8 string because we expect this
1054
1131
tuple will be ignored, and don't want to take the time to
1056
:return: None if the these don't match
1057
A tuple of information about the change, or
1058
the object 'uninteresting' if these match, but are
1059
basically identical.
1133
:return: (iter_changes_result, changed). If the entry has not been
1134
handled then changed is None. Otherwise it is False if no content
1135
or metadata changes have occured, and True if any content or
1136
metadata change has occurred. If self.include_unchanged is True then
1137
if changed is not None, iter_changes_result will always be a result
1138
tuple. Otherwise, iter_changes_result is None unless changed is
1061
1141
cdef char target_minikind
1062
1142
cdef char source_minikind
1140
1223
if source_minikind != c'f':
1141
1224
content_change = 1
1143
# If the size is the same, check the sha:
1144
if target_details[2] == source_details[2]:
1145
if link_or_sha1 is None:
1147
statvalue, link_or_sha1 = \
1148
self.state._sha1_provider.stat_and_sha1(
1150
self.state._observed_sha1(entry, link_or_sha1,
1152
content_change = (link_or_sha1 != source_details[1])
1154
# Size changed, so must be different
1226
# Check the sha. We can't just rely on the size as
1227
# content filtering may mean differ sizes actually
1228
# map to the same content
1229
if link_or_sha1 is None:
1231
statvalue, link_or_sha1 = \
1232
self.state._sha1_provider.stat_and_sha1(
1234
self.state._observed_sha1(entry, link_or_sha1,
1236
content_change = (link_or_sha1 != source_details[1])
1156
1237
# Target details is updated at update_entry time
1157
1238
if self.use_filesystem_for_exec:
1158
1239
# We don't need S_ISREG here, because we are sure
1308
1393
(parent_id, None),
1309
1394
(self.utf8_decode(entry[0][1])[0], None),
1310
1395
(_minikind_to_kind(source_minikind), None),
1311
(source_details[3], None))
1396
(source_details[3], None)), True
1312
1397
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1313
1398
# a rename; could be a true rename, or a rename inherited from
1314
1399
# a renamed parent. TODO: handle this efficiently. Its not
1315
1400
# common case to rename dirs though, so a correct but slow
1316
1401
# implementation will do.
1317
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1402
if (not self.doing_consistency_expansion and
1403
not osutils.is_inside_any(self.searched_specific_files,
1404
target_details[1])):
1318
1405
self.search_specific_files.add(target_details[1])
1406
# We don't expand the specific files parents list here as
1407
# the path is absent in target and won't create a delta with
1319
1409
elif ((source_minikind == c'r' or source_minikind == c'a') and
1320
1410
(target_minikind == c'r' or target_minikind == c'a')):
1321
1411
# neither of the selected trees contain this path,
1335
1425
def iter_changes(self):
1338
cdef void _update_current_block(self):
1428
cdef int _gather_result_for_consistency(self, result) except -1:
1429
"""Check a result we will yield to make sure we are consistent later.
1431
This gathers result's parents into a set to output later.
1433
:param result: A result tuple.
1435
if not self.partial or not result[0]:
1437
self.seen_ids.add(result[0])
1438
new_path = result[1][1]
1440
# Not the root and not a delete: queue up the parents of the path.
1441
self.search_specific_file_parents.update(
1442
osutils.parent_directories(new_path.encode('utf8')))
1443
# Add the root directory which parent_directories does not
1445
self.search_specific_file_parents.add('')
1448
cdef int _update_current_block(self) except -1:
1339
1449
if (self.block_index < len(self.state._dirblocks) and
1340
1450
osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1341
1451
self.current_block = self.state._dirblocks[self.block_index]
1401
1512
cdef char * current_dirname_c, * current_blockname_c
1402
1513
cdef int advance_entry, advance_path
1403
1514
cdef int path_handled
1404
uninteresting = self.uninteresting
1405
1515
searched_specific_files = self.searched_specific_files
1406
1516
# Are we walking a root?
1407
1517
while self.root_entries_pos < self.root_entries_len:
1408
1518
entry = self.root_entries[self.root_entries_pos]
1409
1519
self.root_entries_pos = self.root_entries_pos + 1
1410
result = self._process_entry(entry, self.root_dir_info)
1411
if result is not None and result is not self.uninteresting:
1520
result, changed = self._process_entry(entry, self.root_dir_info)
1521
if changed is not None:
1523
self._gather_result_for_consistency(result)
1524
if changed or self.include_unchanged:
1413
1526
# Have we finished the prior root, or never started one ?
1414
1527
if self.current_root is None:
1415
1528
# TODO: the pending list should be lexically sorted? the
1416
1529
# interface doesn't require it.
1418
1531
self.current_root = self.search_specific_files.pop()
1420
1533
raise StopIteration()
1421
self.current_root_unicode = self.current_root.decode('utf8')
1422
1534
self.searched_specific_files.add(self.current_root)
1423
1535
# process the entries for this containing directory: the rest will be
1424
1536
# found by their parents recursively.
1425
1537
self.root_entries = self.state._entries_for_path(self.current_root)
1426
1538
self.root_entries_len = len(self.root_entries)
1539
self.current_root_unicode = self.current_root.decode('utf8')
1427
1540
self.root_abspath = self.tree.abspath(self.current_root_unicode)
1429
1542
root_stat = os.lstat(self.root_abspath)
1775
1904
self.current_dir_info = self.dir_iterator.next()
1776
1905
self.current_dir_list = self.current_dir_info[1]
1777
except StopIteration:
1906
except StopIteration, _:
1778
1907
self.current_dir_info = None
1909
cdef object _next_consistent_entries(self):
1910
"""Grabs the next specific file parent case to consider.
1912
:return: A list of the results, each of which is as for _process_entry.
1915
while self.search_specific_file_parents:
1916
# Process the parent directories for the paths we were iterating.
1917
# Even in extremely large trees this should be modest, so currently
1918
# no attempt is made to optimise.
1919
path_utf8 = self.search_specific_file_parents.pop()
1920
if path_utf8 in self.searched_exact_paths:
1921
# We've examined this path.
1923
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1924
# We've examined this path.
1926
path_entries = self.state._entries_for_path(path_utf8)
1927
# We need either one or two entries. If the path in
1928
# self.target_index has moved (so the entry in source_index is in
1929
# 'ar') then we need to also look for the entry for this path in
1930
# self.source_index, to output the appropriate delete-or-rename.
1931
selected_entries = []
1933
for candidate_entry in path_entries:
1934
# Find entries present in target at this path:
1935
if candidate_entry[1][self.target_index][0] not in 'ar':
1937
selected_entries.append(candidate_entry)
1938
# Find entries present in source at this path:
1939
elif (self.source_index is not None and
1940
candidate_entry[1][self.source_index][0] not in 'ar'):
1942
if candidate_entry[1][self.target_index][0] == 'a':
1943
# Deleted, emit it here.
1944
selected_entries.append(candidate_entry)
1946
# renamed, emit it when we process the directory it
1948
self.search_specific_file_parents.add(
1949
candidate_entry[1][self.target_index][1])
1951
raise AssertionError(
1952
"Missing entry for specific path parent %r, %r" % (
1953
path_utf8, path_entries))
1954
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1955
for entry in selected_entries:
1956
if entry[0][2] in self.seen_ids:
1958
result, changed = self._process_entry(entry, path_info)
1960
raise AssertionError(
1961
"Got entry<->path mismatch for specific path "
1962
"%r entry %r path_info %r " % (
1963
path_utf8, entry, path_info))
1964
# Only include changes - we're outside the users requested
1967
self._gather_result_for_consistency(result)
1968
if (result[6][0] == 'directory' and
1969
result[6][1] != 'directory'):
1970
# This stopped being a directory, the old children have
1972
if entry[1][self.source_index][0] == 'r':
1973
# renamed, take the source path
1974
entry_path_utf8 = entry[1][self.source_index][1]
1976
entry_path_utf8 = path_utf8
1977
initial_key = (entry_path_utf8, '', '')
1978
block_index, _ = self.state._find_block_index_from_key(
1980
if block_index == 0:
1981
# The children of the root are in block index 1.
1982
block_index = block_index + 1
1983
current_block = None
1984
if block_index < len(self.state._dirblocks):
1985
current_block = self.state._dirblocks[block_index]
1986
if not osutils.is_inside(
1987
entry_path_utf8, current_block[0]):
1988
# No entries for this directory at all.
1989
current_block = None
1990
if current_block is not None:
1991
for entry in current_block[1]:
1992
if entry[1][self.source_index][0] in 'ar':
1993
# Not in the source tree, so doesn't have to be
1996
# Path of the entry itself.
1997
self.search_specific_file_parents.add(
1998
self.pathjoin(*entry[0][:2]))
1999
if changed or self.include_unchanged:
2000
results.append((result, changed))
2001
self.searched_exact_paths.add(path_utf8)
2004
cdef object _path_info(self, utf8_path, unicode_path):
2005
"""Generate path_info for unicode_path.
2007
:return: None if unicode_path does not exist, or a path_info tuple.
2009
abspath = self.tree.abspath(unicode_path)
2011
stat = os.lstat(abspath)
2013
if e.errno == errno.ENOENT:
2014
# the path does not exist.
2018
utf8_basename = utf8_path.rsplit('/', 1)[-1]
2019
dir_info = (utf8_path, utf8_basename,
2020
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
2022
if dir_info[2] == 'directory':
2023
if self.tree._directory_is_tree_reference(
2025
self.root_dir_info = self.root_dir_info[:2] + \
2026
('tree-reference',) + self.root_dir_info[3:]