288
293
if not PyString_CheckExact(path2):
289
294
raise TypeError("'path2' must be a plain string, not %s: %r"
290
295
% (type(path2), path2))
291
return _cmp_path_by_dirblock(PyString_AsString(path1),
292
PyString_Size(path1),
293
PyString_AsString(path2),
294
PyString_Size(path2))
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
char *path2, int path2_len):
296
return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
297
PyString_Size(path1),
298
PyString_AsString(path2),
299
PyString_Size(path2))
302
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
303
char *path2, int path2_len): # cannot_raise
299
304
"""Compare two paths by what directory they are in.
301
see ``_cmp_path_by_dirblock_c`` for details.
306
see ``_cmp_path_by_dirblock`` for details.
303
308
cdef char *dirname1
304
309
cdef int dirname1_len
648
656
# Build up the key that will be used.
649
657
# By using <object>(void *) Pyrex will automatically handle the
650
658
# Py_INCREF that we need.
651
path_name_file_id_key = (<object>p_current_dirname[0],
659
cur_dirname = <object>p_current_dirname[0]
660
# Use StaticTuple_New to pre-allocate, rather than creating a regular
661
# tuple and passing it to the StaticTuple constructor.
662
# path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
663
# self.get_next_str(),
664
# self.get_next_str(),
666
tmp = StaticTuple_New(3)
667
Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
668
cur_basename = self.get_next_str()
669
cur_file_id = self.get_next_str()
670
Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
671
Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
672
path_name_file_id_key = tmp
656
674
# Parse all of the per-tree information. current has the information in
657
675
# the same location as parent trees. The only difference is that 'info'
675
693
executable_cstr = self.get_next(&cur_size)
676
694
is_executable = (executable_cstr[0] == c'y')
677
695
info = self.get_next_str()
678
PyList_Append(trees, (
696
# TODO: If we want to use StaticTuple_New here we need to be pretty
697
# careful. We are relying on a bit of Pyrex
698
# automatic-conversion from 'int' to PyInt, and that doesn't
699
# play well with the StaticTuple_SET_ITEM macro.
700
# Timing doesn't (yet) show a worthwile improvement in speed
701
# versus complexity and maintainability.
702
# tmp = StaticTuple_New(5)
703
# Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
704
# Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
705
# Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
706
# Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
707
# Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
708
# PyList_Append(trees, tmp)
709
PyList_Append(trees, StaticTuple(
679
710
minikind, # minikind
680
711
fingerprint, # fingerprint
681
712
entry_size, # size
883
918
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
884
919
executable, packed_stat)
886
entry[1][0] = ('f', '', stat_value.st_size,
887
executable, DirState.NULLSTAT)
921
# This file is not worth caching the sha1. Either it is too new, or
922
# it is newly added. Regardless, the only things we are changing
923
# are derived from the stat, and so are not worth caching. So we do
924
# *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
925
# updated values if there is *other* data worth saving.)
926
entry[1][0] = ('f', '', stat_value.st_size, executable,
888
929
elif minikind == c'd':
890
930
entry[1][0] = ('d', '', 0, False, packed_stat)
891
931
if saved_minikind != c'd':
892
932
# This changed from something into a directory. Make sure we
896
936
self._get_block_entry_index(entry[0][0], entry[0][1], 0)
897
937
self._ensure_block(block_index, entry_index,
898
938
pathjoin(entry[0][0], entry[0][1]))
940
# Any changes are derived trivially from the stat object, not worth
941
# re-writing a dirstate for just this
899
943
elif minikind == c'l':
944
if saved_minikind == c'l':
945
# If the object hasn't changed kind, it isn't worth saving the
946
# dirstate just for a symlink. The default is 'fast symlinks' which
947
# save the target in the inode entry, rather than separately. So to
948
# stat, we've already read everything off disk.
900
950
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
901
951
if self._cutoff_time is None:
902
952
self._sha_cutoff_time()
960
1014
cdef class ProcessEntryC:
1016
cdef int doing_consistency_expansion
962
1017
cdef object old_dirname_to_file_id # dict
963
1018
cdef object new_dirname_to_file_id # dict
964
cdef readonly object uninteresting
965
1019
cdef object last_source_parent
966
1020
cdef object last_target_parent
967
cdef object include_unchanged
1021
cdef int include_unchanged
968
1023
cdef object use_filesystem_for_exec
969
1024
cdef object utf8_decode
970
1025
cdef readonly object searched_specific_files
1026
cdef readonly object searched_exact_paths
971
1027
cdef object search_specific_files
1028
# The parents up to the root of the paths we are searching.
1029
# After all normal paths are returned, these specific items are returned.
1030
cdef object search_specific_file_parents
972
1031
cdef object state
973
1032
# Current iteration variables:
974
1033
cdef object current_root
986
1045
cdef object current_block_list
987
1046
cdef object current_dir_info
988
1047
cdef object current_dir_list
1048
cdef object _pending_consistent_entries # list
989
1049
cdef int path_index
990
1050
cdef object root_dir_info
991
1051
cdef object bisect_left
992
1052
cdef object pathjoin
993
1053
cdef object fstat
1054
# A set of the ids we've output when doing partial output.
1055
cdef object seen_ids
994
1056
cdef object sha_file
996
1058
def __init__(self, include_unchanged, use_filesystem_for_exec,
997
1059
search_specific_files, state, source_index, target_index,
998
1060
want_unversioned, tree):
1061
self.doing_consistency_expansion = 0
999
1062
self.old_dirname_to_file_id = {}
1000
1063
self.new_dirname_to_file_id = {}
1001
# Just a sentry, so that _process_entry can say that this
1002
# record is handled, but isn't interesting to process (unchanged)
1003
self.uninteresting = object()
1064
# Are we doing a partial iter_changes?
1065
self.partial = set(['']).__ne__(search_specific_files)
1004
1066
# Using a list so that we can access the values and change them in
1005
1067
# nested scope. Each one is [path, file_id, entry]
1006
1068
self.last_source_parent = [None, None]
1007
1069
self.last_target_parent = [None, None]
1008
self.include_unchanged = include_unchanged
1070
if include_unchanged is None:
1071
self.include_unchanged = False
1073
self.include_unchanged = int(include_unchanged)
1009
1074
self.use_filesystem_for_exec = use_filesystem_for_exec
1010
1075
self.utf8_decode = cache_utf8._utf8_decode
1011
1076
# for all search_indexs in each path at or under each element of
1012
# search_specific_files, if the detail is relocated: add the id, and add the
1013
# relocated path as one to search if its not searched already. If the
1014
# detail is not relocated, add the id.
1077
# search_specific_files, if the detail is relocated: add the id, and
1078
# add the relocated path as one to search if its not searched already.
1079
# If the detail is not relocated, add the id.
1015
1080
self.searched_specific_files = set()
1081
# When we search exact paths without expanding downwards, we record
1083
self.searched_exact_paths = set()
1016
1084
self.search_specific_files = search_specific_files
1085
# The parents up to the root of the paths we are searching.
1086
# After all normal paths are returned, these specific items are returned.
1087
self.search_specific_file_parents = set()
1088
# The ids we've sent out in the delta.
1089
self.seen_ids = set()
1017
1090
self.state = state
1018
1091
self.current_root = None
1019
1092
self.current_root_unicode = None
1035
1108
self.current_block_pos = -1
1036
1109
self.current_dir_info = None
1037
1110
self.current_dir_list = None
1111
self._pending_consistent_entries = []
1038
1112
self.path_index = 0
1039
1113
self.root_dir_info = None
1040
1114
self.bisect_left = bisect.bisect_left
1041
1115
self.pathjoin = osutils.pathjoin
1042
1116
self.fstat = os.fstat
1043
1117
self.sha_file = osutils.sha_file
1118
if target_index != 0:
1119
# A lot of code in here depends on target_index == 0
1120
raise errors.BzrError('unsupported target index')
1045
1122
cdef _process_entry(self, entry, path_info):
1046
1123
"""Compare an entry and real disk to generate delta information.
1048
1125
:param path_info: top_relpath, basename, kind, lstat, abspath for
1049
the path of entry. If None, then the path is considered absent.
1050
(Perhaps we should pass in a concrete entry for this ?)
1126
the path of entry. If None, then the path is considered absent in
1127
the target (Perhaps we should pass in a concrete entry for this ?)
1051
1128
Basename is returned as a utf8 string because we expect this
1052
1129
tuple will be ignored, and don't want to take the time to
1054
:return: None if the these don't match
1055
A tuple of information about the change, or
1056
the object 'uninteresting' if these match, but are
1057
basically identical.
1131
:return: (iter_changes_result, changed). If the entry has not been
1132
handled then changed is None. Otherwise it is False if no content
1133
or metadata changes have occured, and True if any content or
1134
metadata change has occurred. If self.include_unchanged is True then
1135
if changed is not None, iter_changes_result will always be a result
1136
tuple. Otherwise, iter_changes_result is None unless changed is
1059
1139
cdef char target_minikind
1060
1140
cdef char source_minikind
1138
1221
if source_minikind != c'f':
1139
1222
content_change = 1
1141
# If the size is the same, check the sha:
1142
if target_details[2] == source_details[2]:
1143
if link_or_sha1 is None:
1145
file_obj = file(path_info[4], 'rb')
1147
# XXX: TODO: Use lower level file IO rather
1148
# than python objects for sha-misses.
1149
statvalue = self.fstat(file_obj.fileno())
1150
link_or_sha1 = self.sha_file(file_obj)
1153
self.state._observed_sha1(entry, link_or_sha1,
1155
content_change = (link_or_sha1 != source_details[1])
1157
# Size changed, so must be different
1224
# Check the sha. We can't just rely on the size as
1225
# content filtering may mean differ sizes actually
1226
# map to the same content
1227
if link_or_sha1 is None:
1229
statvalue, link_or_sha1 = \
1230
self.state._sha1_provider.stat_and_sha1(
1232
self.state._observed_sha1(entry, link_or_sha1,
1234
content_change = (link_or_sha1 != source_details[1])
1159
1235
# Target details is updated at update_entry time
1160
1236
if self.use_filesystem_for_exec:
1161
1237
# We don't need S_ISREG here, because we are sure
1248
1330
(source_parent_id, target_parent_id),
1249
1331
(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1250
1332
(source_kind, target_kind),
1251
(source_exec, target_exec))
1253
return self.uninteresting
1333
(source_exec, target_exec)), changed
1254
1334
elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1255
1335
# looks like a new file
1256
1336
path = self.pathjoin(entry[0][0], entry[0][1])
1257
1337
# parent id is the entry for the path in the target tree
1258
1338
# TODO: these are the same for an entire directory: cache em.
1259
parent_id = self.state._get_entry(self.target_index,
1260
path_utf8=entry[0][0])[0][2]
1339
parent_entry = self.state._get_entry(self.target_index,
1340
path_utf8=entry[0][0])
1341
if parent_entry is None:
1342
raise errors.DirstateCorrupt(self.state,
1343
"We could not find the parent entry in index %d"
1344
" for the entry: %s"
1345
% (self.target_index, entry[0]))
1346
parent_id = parent_entry[0][2]
1261
1347
if parent_id == entry[0][2]:
1262
1348
parent_id = None
1263
1349
if path_info is not None:
1305
1391
(parent_id, None),
1306
1392
(self.utf8_decode(entry[0][1])[0], None),
1307
1393
(_minikind_to_kind(source_minikind), None),
1308
(source_details[3], None))
1394
(source_details[3], None)), True
1309
1395
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1310
1396
# a rename; could be a true rename, or a rename inherited from
1311
1397
# a renamed parent. TODO: handle this efficiently. Its not
1312
1398
# common case to rename dirs though, so a correct but slow
1313
1399
# implementation will do.
1314
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1400
if (not self.doing_consistency_expansion and
1401
not osutils.is_inside_any(self.searched_specific_files,
1402
target_details[1])):
1315
1403
self.search_specific_files.add(target_details[1])
1404
# We don't expand the specific files parents list here as
1405
# the path is absent in target and won't create a delta with
1316
1407
elif ((source_minikind == c'r' or source_minikind == c'a') and
1317
1408
(target_minikind == c'r' or target_minikind == c'a')):
1318
1409
# neither of the selected trees contain this path,
1332
1423
def iter_changes(self):
1335
cdef void _update_current_block(self):
1426
cdef int _gather_result_for_consistency(self, result) except -1:
1427
"""Check a result we will yield to make sure we are consistent later.
1429
This gathers result's parents into a set to output later.
1431
:param result: A result tuple.
1433
if not self.partial or not result[0]:
1435
self.seen_ids.add(result[0])
1436
new_path = result[1][1]
1438
# Not the root and not a delete: queue up the parents of the path.
1439
self.search_specific_file_parents.update(
1440
osutils.parent_directories(new_path.encode('utf8')))
1441
# Add the root directory which parent_directories does not
1443
self.search_specific_file_parents.add('')
1446
cdef int _update_current_block(self) except -1:
1336
1447
if (self.block_index < len(self.state._dirblocks) and
1337
1448
osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1338
1449
self.current_block = self.state._dirblocks[self.block_index]
1398
1510
cdef char * current_dirname_c, * current_blockname_c
1399
1511
cdef int advance_entry, advance_path
1400
1512
cdef int path_handled
1401
uninteresting = self.uninteresting
1402
1513
searched_specific_files = self.searched_specific_files
1403
1514
# Are we walking a root?
1404
1515
while self.root_entries_pos < self.root_entries_len:
1405
1516
entry = self.root_entries[self.root_entries_pos]
1406
1517
self.root_entries_pos = self.root_entries_pos + 1
1407
result = self._process_entry(entry, self.root_dir_info)
1408
if result is not None and result is not self.uninteresting:
1518
result, changed = self._process_entry(entry, self.root_dir_info)
1519
if changed is not None:
1521
self._gather_result_for_consistency(result)
1522
if changed or self.include_unchanged:
1410
1524
# Have we finished the prior root, or never started one ?
1411
1525
if self.current_root is None:
1412
1526
# TODO: the pending list should be lexically sorted? the
1413
1527
# interface doesn't require it.
1415
1529
self.current_root = self.search_specific_files.pop()
1417
1531
raise StopIteration()
1418
self.current_root_unicode = self.current_root.decode('utf8')
1419
1532
self.searched_specific_files.add(self.current_root)
1420
1533
# process the entries for this containing directory: the rest will be
1421
1534
# found by their parents recursively.
1422
1535
self.root_entries = self.state._entries_for_path(self.current_root)
1423
1536
self.root_entries_len = len(self.root_entries)
1537
self.current_root_unicode = self.current_root.decode('utf8')
1424
1538
self.root_abspath = self.tree.abspath(self.current_root_unicode)
1426
1540
root_stat = os.lstat(self.root_abspath)
1772
1902
self.current_dir_info = self.dir_iterator.next()
1773
1903
self.current_dir_list = self.current_dir_info[1]
1774
except StopIteration:
1904
except StopIteration, _:
1775
1905
self.current_dir_info = None
1907
cdef object _next_consistent_entries(self):
1908
"""Grabs the next specific file parent case to consider.
1910
:return: A list of the results, each of which is as for _process_entry.
1913
while self.search_specific_file_parents:
1914
# Process the parent directories for the paths we were iterating.
1915
# Even in extremely large trees this should be modest, so currently
1916
# no attempt is made to optimise.
1917
path_utf8 = self.search_specific_file_parents.pop()
1918
if path_utf8 in self.searched_exact_paths:
1919
# We've examined this path.
1921
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1922
# We've examined this path.
1924
path_entries = self.state._entries_for_path(path_utf8)
1925
# We need either one or two entries. If the path in
1926
# self.target_index has moved (so the entry in source_index is in
1927
# 'ar') then we need to also look for the entry for this path in
1928
# self.source_index, to output the appropriate delete-or-rename.
1929
selected_entries = []
1931
for candidate_entry in path_entries:
1932
# Find entries present in target at this path:
1933
if candidate_entry[1][self.target_index][0] not in 'ar':
1935
selected_entries.append(candidate_entry)
1936
# Find entries present in source at this path:
1937
elif (self.source_index is not None and
1938
candidate_entry[1][self.source_index][0] not in 'ar'):
1940
if candidate_entry[1][self.target_index][0] == 'a':
1941
# Deleted, emit it here.
1942
selected_entries.append(candidate_entry)
1944
# renamed, emit it when we process the directory it
1946
self.search_specific_file_parents.add(
1947
candidate_entry[1][self.target_index][1])
1949
raise AssertionError(
1950
"Missing entry for specific path parent %r, %r" % (
1951
path_utf8, path_entries))
1952
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1953
for entry in selected_entries:
1954
if entry[0][2] in self.seen_ids:
1956
result, changed = self._process_entry(entry, path_info)
1958
raise AssertionError(
1959
"Got entry<->path mismatch for specific path "
1960
"%r entry %r path_info %r " % (
1961
path_utf8, entry, path_info))
1962
# Only include changes - we're outside the users requested
1965
self._gather_result_for_consistency(result)
1966
if (result[6][0] == 'directory' and
1967
result[6][1] != 'directory'):
1968
# This stopped being a directory, the old children have
1970
if entry[1][self.source_index][0] == 'r':
1971
# renamed, take the source path
1972
entry_path_utf8 = entry[1][self.source_index][1]
1974
entry_path_utf8 = path_utf8
1975
initial_key = (entry_path_utf8, '', '')
1976
block_index, _ = self.state._find_block_index_from_key(
1978
if block_index == 0:
1979
# The children of the root are in block index 1.
1980
block_index = block_index + 1
1981
current_block = None
1982
if block_index < len(self.state._dirblocks):
1983
current_block = self.state._dirblocks[block_index]
1984
if not osutils.is_inside(
1985
entry_path_utf8, current_block[0]):
1986
# No entries for this directory at all.
1987
current_block = None
1988
if current_block is not None:
1989
for entry in current_block[1]:
1990
if entry[1][self.source_index][0] in 'ar':
1991
# Not in the source tree, so doesn't have to be
1994
# Path of the entry itself.
1995
self.search_specific_file_parents.add(
1996
self.pathjoin(*entry[0][:2]))
1997
if changed or self.include_unchanged:
1998
results.append((result, changed))
1999
self.searched_exact_paths.add(path_utf8)
2002
cdef object _path_info(self, utf8_path, unicode_path):
2003
"""Generate path_info for unicode_path.
2005
:return: None if unicode_path does not exist, or a path_info tuple.
2007
abspath = self.tree.abspath(unicode_path)
2009
stat = os.lstat(abspath)
2011
if e.errno == errno.ENOENT:
2012
# the path does not exist.
2016
utf8_basename = utf8_path.rsplit('/', 1)[-1]
2017
dir_info = (utf8_path, utf8_basename,
2018
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
2020
if dir_info[2] == 'directory':
2021
if self.tree._directory_is_tree_reference(
2023
self.root_dir_info = self.root_dir_info[:2] + \
2024
('tree-reference',) + self.root_dir_info[3:]