294
288
if not PyString_CheckExact(path2):
295
289
raise TypeError("'path2' must be a plain string, not %s: %r"
296
290
% (type(path2), path2))
297
return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
298
PyString_Size(path1),
299
PyString_AsString(path2),
300
PyString_Size(path2))
303
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
304
char *path2, int path2_len): # cannot_raise
291
return _cmp_path_by_dirblock(PyString_AsString(path1),
292
PyString_Size(path1),
293
PyString_AsString(path2),
294
PyString_Size(path2))
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
char *path2, int path2_len):
305
299
"""Compare two paths by what directory they are in.
307
see ``_cmp_path_by_dirblock`` for details.
301
see ``_cmp_path_by_dirblock_c`` for details.
309
303
cdef char *dirname1
310
304
cdef int dirname1_len
657
648
# Build up the key that will be used.
658
649
# By using <object>(void *) Pyrex will automatically handle the
659
650
# Py_INCREF that we need.
660
cur_dirname = <object>p_current_dirname[0]
661
# Use StaticTuple_New to pre-allocate, rather than creating a regular
662
# tuple and passing it to the StaticTuple constructor.
663
# path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
664
# self.get_next_str(),
665
# self.get_next_str(),
667
tmp = StaticTuple_New(3)
668
Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
669
cur_basename = self.get_next_str()
670
cur_file_id = self.get_next_str()
671
Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
672
Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
673
path_name_file_id_key = tmp
651
path_name_file_id_key = (<object>p_current_dirname[0],
675
656
# Parse all of the per-tree information. current has the information in
676
657
# the same location as parent trees. The only difference is that 'info'
694
675
executable_cstr = self.get_next(&cur_size)
695
676
is_executable = (executable_cstr[0] == c'y')
696
677
info = self.get_next_str()
697
# TODO: If we want to use StaticTuple_New here we need to be pretty
698
# careful. We are relying on a bit of Pyrex
699
# automatic-conversion from 'int' to PyInt, and that doesn't
700
# play well with the StaticTuple_SET_ITEM macro.
701
# Timing doesn't (yet) show a worthwile improvement in speed
702
# versus complexity and maintainability.
703
# tmp = StaticTuple_New(5)
704
# Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
705
# Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
706
# Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
707
# Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
708
# Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
709
# PyList_Append(trees, tmp)
710
PyList_Append(trees, StaticTuple(
678
PyList_Append(trees, (
711
679
minikind, # minikind
712
680
fingerprint, # fingerprint
713
681
entry_size, # size
821
790
cdef char result[6*4] # 6 long ints
822
791
cdef int *aliased
823
792
aliased = <int *>result
824
aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
825
# mtime and ctime will often be floats but get converted to PyInt within
826
aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
827
aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
828
aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
829
aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
830
aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
793
aliased[0] = htonl(stat_value.st_size)
794
aliased[1] = htonl(int(stat_value.st_mtime))
795
aliased[2] = htonl(int(stat_value.st_ctime))
796
aliased[3] = htonl(stat_value.st_dev)
797
aliased[4] = htonl(stat_value.st_ino & 0xFFFFFFFF)
798
aliased[5] = htonl(stat_value.st_mode)
831
799
packed = PyString_FromStringAndSize(result, 6*4)
832
800
return _encode(packed)[:-1]
835
def pack_stat(stat_value):
836
"""Convert stat value into a packed representation quickly with pyrex"""
837
return _pack_stat(stat_value)
840
803
def update_entry(self, entry, abspath, stat_value):
841
804
"""Update the entry based on what is actually on disk.
924
885
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
925
886
executable, packed_stat)
927
# This file is not worth caching the sha1. Either it is too new, or
928
# it is newly added. Regardless, the only things we are changing
929
# are derived from the stat, and so are not worth caching. So we do
930
# *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
931
# updated values if there is *other* data worth saving.)
932
entry[1][0] = ('f', '', stat_value.st_size, executable,
888
entry[1][0] = ('f', '', stat_value.st_size,
889
executable, DirState.NULLSTAT)
935
890
elif minikind == c'd':
936
892
entry[1][0] = ('d', '', 0, False, packed_stat)
937
893
if saved_minikind != c'd':
938
894
# This changed from something into a directory. Make sure we
942
898
self._get_block_entry_index(entry[0][0], entry[0][1], 0)
943
899
self._ensure_block(block_index, entry_index,
944
900
pathjoin(entry[0][0], entry[0][1]))
946
# Any changes are derived trivially from the stat object, not worth
947
# re-writing a dirstate for just this
949
901
elif minikind == c'l':
950
if saved_minikind == c'l':
951
# If the object hasn't changed kind, it isn't worth saving the
952
# dirstate just for a symlink. The default is 'fast symlinks' which
953
# save the target in the inode entry, rather than separately. So to
954
# stat, we've already read everything off disk.
956
902
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
957
903
if self._cutoff_time is None:
958
904
self._sha_cutoff_time()
1020
962
cdef class ProcessEntryC:
1022
cdef int doing_consistency_expansion
1023
964
cdef object old_dirname_to_file_id # dict
1024
965
cdef object new_dirname_to_file_id # dict
966
cdef readonly object uninteresting
1025
967
cdef object last_source_parent
1026
968
cdef object last_target_parent
1027
cdef int include_unchanged
969
cdef object include_unchanged
1029
970
cdef object use_filesystem_for_exec
1030
971
cdef object utf8_decode
1031
972
cdef readonly object searched_specific_files
1032
cdef readonly object searched_exact_paths
1033
973
cdef object search_specific_files
1034
# The parents up to the root of the paths we are searching.
1035
# After all normal paths are returned, these specific items are returned.
1036
cdef object search_specific_file_parents
1037
974
cdef object state
1038
975
# Current iteration variables:
1039
976
cdef object current_root
1051
988
cdef object current_block_list
1052
989
cdef object current_dir_info
1053
990
cdef object current_dir_list
1054
cdef object _pending_consistent_entries # list
1055
991
cdef int path_index
1056
992
cdef object root_dir_info
1057
993
cdef object bisect_left
1058
994
cdef object pathjoin
1059
995
cdef object fstat
1060
# A set of the ids we've output when doing partial output.
1061
cdef object seen_ids
1062
996
cdef object sha_file
1064
998
def __init__(self, include_unchanged, use_filesystem_for_exec,
1065
999
search_specific_files, state, source_index, target_index,
1066
1000
want_unversioned, tree):
1067
self.doing_consistency_expansion = 0
1068
1001
self.old_dirname_to_file_id = {}
1069
1002
self.new_dirname_to_file_id = {}
1070
# Are we doing a partial iter_changes?
1071
self.partial = set(['']).__ne__(search_specific_files)
1003
# Just a sentry, so that _process_entry can say that this
1004
# record is handled, but isn't interesting to process (unchanged)
1005
self.uninteresting = object()
1072
1006
# Using a list so that we can access the values and change them in
1073
1007
# nested scope. Each one is [path, file_id, entry]
1074
1008
self.last_source_parent = [None, None]
1075
1009
self.last_target_parent = [None, None]
1076
if include_unchanged is None:
1077
self.include_unchanged = False
1079
self.include_unchanged = int(include_unchanged)
1010
self.include_unchanged = include_unchanged
1080
1011
self.use_filesystem_for_exec = use_filesystem_for_exec
1081
1012
self.utf8_decode = cache_utf8._utf8_decode
1082
1013
# for all search_indexs in each path at or under each element of
1083
# search_specific_files, if the detail is relocated: add the id, and
1084
# add the relocated path as one to search if its not searched already.
1085
# If the detail is not relocated, add the id.
1014
# search_specific_files, if the detail is relocated: add the id, and add the
1015
# relocated path as one to search if its not searched already. If the
1016
# detail is not relocated, add the id.
1086
1017
self.searched_specific_files = set()
1087
# When we search exact paths without expanding downwards, we record
1089
self.searched_exact_paths = set()
1090
1018
self.search_specific_files = search_specific_files
1091
# The parents up to the root of the paths we are searching.
1092
# After all normal paths are returned, these specific items are returned.
1093
self.search_specific_file_parents = set()
1094
# The ids we've sent out in the delta.
1095
self.seen_ids = set()
1096
1019
self.state = state
1097
1020
self.current_root = None
1098
1021
self.current_root_unicode = None
1114
1037
self.current_block_pos = -1
1115
1038
self.current_dir_info = None
1116
1039
self.current_dir_list = None
1117
self._pending_consistent_entries = []
1118
1040
self.path_index = 0
1119
1041
self.root_dir_info = None
1120
1042
self.bisect_left = bisect.bisect_left
1121
1043
self.pathjoin = osutils.pathjoin
1122
1044
self.fstat = os.fstat
1123
1045
self.sha_file = osutils.sha_file
1124
if target_index != 0:
1125
# A lot of code in here depends on target_index == 0
1126
raise errors.BzrError('unsupported target index')
1128
1047
cdef _process_entry(self, entry, path_info):
1129
1048
"""Compare an entry and real disk to generate delta information.
1131
1050
:param path_info: top_relpath, basename, kind, lstat, abspath for
1132
the path of entry. If None, then the path is considered absent in
1133
the target (Perhaps we should pass in a concrete entry for this ?)
1051
the path of entry. If None, then the path is considered absent.
1052
(Perhaps we should pass in a concrete entry for this ?)
1134
1053
Basename is returned as a utf8 string because we expect this
1135
1054
tuple will be ignored, and don't want to take the time to
1137
:return: (iter_changes_result, changed). If the entry has not been
1138
handled then changed is None. Otherwise it is False if no content
1139
or metadata changes have occured, and True if any content or
1140
metadata change has occurred. If self.include_unchanged is True then
1141
if changed is not None, iter_changes_result will always be a result
1142
tuple. Otherwise, iter_changes_result is None unless changed is
1056
:return: None if the these don't match
1057
A tuple of information about the change, or
1058
the object 'uninteresting' if these match, but are
1059
basically identical.
1145
1061
cdef char target_minikind
1146
1062
cdef char source_minikind
1227
1140
if source_minikind != c'f':
1228
1141
content_change = 1
1230
# Check the sha. We can't just rely on the size as
1231
# content filtering may mean differ sizes actually
1232
# map to the same content
1233
if link_or_sha1 is None:
1235
statvalue, link_or_sha1 = \
1236
self.state._sha1_provider.stat_and_sha1(
1238
self.state._observed_sha1(entry, link_or_sha1,
1240
content_change = (link_or_sha1 != source_details[1])
1143
# If the size is the same, check the sha:
1144
if target_details[2] == source_details[2]:
1145
if link_or_sha1 is None:
1147
statvalue, link_or_sha1 = \
1148
self.state._sha1_provider.stat_and_sha1(
1150
self.state._observed_sha1(entry, link_or_sha1,
1152
content_change = (link_or_sha1 != source_details[1])
1154
# Size changed, so must be different
1241
1156
# Target details is updated at update_entry time
1242
1157
if self.use_filesystem_for_exec:
1243
1158
# We don't need S_ISREG here, because we are sure
1397
1308
(parent_id, None),
1398
1309
(self.utf8_decode(entry[0][1])[0], None),
1399
1310
(_minikind_to_kind(source_minikind), None),
1400
(source_details[3], None)), True
1311
(source_details[3], None))
1401
1312
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1402
1313
# a rename; could be a true rename, or a rename inherited from
1403
1314
# a renamed parent. TODO: handle this efficiently. Its not
1404
1315
# common case to rename dirs though, so a correct but slow
1405
1316
# implementation will do.
1406
if (not self.doing_consistency_expansion and
1407
not osutils.is_inside_any(self.searched_specific_files,
1408
target_details[1])):
1317
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1409
1318
self.search_specific_files.add(target_details[1])
1410
# We don't expand the specific files parents list here as
1411
# the path is absent in target and won't create a delta with
1413
1319
elif ((source_minikind == c'r' or source_minikind == c'a') and
1414
1320
(target_minikind == c'r' or target_minikind == c'a')):
1415
1321
# neither of the selected trees contain this path,
1429
1335
def iter_changes(self):
1432
cdef int _gather_result_for_consistency(self, result) except -1:
1433
"""Check a result we will yield to make sure we are consistent later.
1435
This gathers result's parents into a set to output later.
1437
:param result: A result tuple.
1439
if not self.partial or not result[0]:
1441
self.seen_ids.add(result[0])
1442
new_path = result[1][1]
1444
# Not the root and not a delete: queue up the parents of the path.
1445
self.search_specific_file_parents.update(
1446
osutils.parent_directories(new_path.encode('utf8')))
1447
# Add the root directory which parent_directories does not
1449
self.search_specific_file_parents.add('')
1452
cdef int _update_current_block(self) except -1:
1338
cdef void _update_current_block(self):
1453
1339
if (self.block_index < len(self.state._dirblocks) and
1454
1340
osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1455
1341
self.current_block = self.state._dirblocks[self.block_index]
1516
1401
cdef char * current_dirname_c, * current_blockname_c
1517
1402
cdef int advance_entry, advance_path
1518
1403
cdef int path_handled
1404
uninteresting = self.uninteresting
1519
1405
searched_specific_files = self.searched_specific_files
1520
1406
# Are we walking a root?
1521
1407
while self.root_entries_pos < self.root_entries_len:
1522
1408
entry = self.root_entries[self.root_entries_pos]
1523
1409
self.root_entries_pos = self.root_entries_pos + 1
1524
result, changed = self._process_entry(entry, self.root_dir_info)
1525
if changed is not None:
1527
self._gather_result_for_consistency(result)
1528
if changed or self.include_unchanged:
1410
result = self._process_entry(entry, self.root_dir_info)
1411
if result is not None and result is not self.uninteresting:
1530
1413
# Have we finished the prior root, or never started one ?
1531
1414
if self.current_root is None:
1532
1415
# TODO: the pending list should be lexically sorted? the
1533
1416
# interface doesn't require it.
1535
1418
self.current_root = self.search_specific_files.pop()
1537
1420
raise StopIteration()
1421
self.current_root_unicode = self.current_root.decode('utf8')
1538
1422
self.searched_specific_files.add(self.current_root)
1539
1423
# process the entries for this containing directory: the rest will be
1540
1424
# found by their parents recursively.
1541
1425
self.root_entries = self.state._entries_for_path(self.current_root)
1542
1426
self.root_entries_len = len(self.root_entries)
1543
self.current_root_unicode = self.current_root.decode('utf8')
1544
1427
self.root_abspath = self.tree.abspath(self.current_root_unicode)
1546
1429
root_stat = os.lstat(self.root_abspath)
1908
1775
self.current_dir_info = self.dir_iterator.next()
1909
1776
self.current_dir_list = self.current_dir_info[1]
1910
except StopIteration, _:
1777
except StopIteration:
1911
1778
self.current_dir_info = None
1913
cdef object _next_consistent_entries(self):
1914
"""Grabs the next specific file parent case to consider.
1916
:return: A list of the results, each of which is as for _process_entry.
1919
while self.search_specific_file_parents:
1920
# Process the parent directories for the paths we were iterating.
1921
# Even in extremely large trees this should be modest, so currently
1922
# no attempt is made to optimise.
1923
path_utf8 = self.search_specific_file_parents.pop()
1924
if path_utf8 in self.searched_exact_paths:
1925
# We've examined this path.
1927
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1928
# We've examined this path.
1930
path_entries = self.state._entries_for_path(path_utf8)
1931
# We need either one or two entries. If the path in
1932
# self.target_index has moved (so the entry in source_index is in
1933
# 'ar') then we need to also look for the entry for this path in
1934
# self.source_index, to output the appropriate delete-or-rename.
1935
selected_entries = []
1937
for candidate_entry in path_entries:
1938
# Find entries present in target at this path:
1939
if candidate_entry[1][self.target_index][0] not in 'ar':
1941
selected_entries.append(candidate_entry)
1942
# Find entries present in source at this path:
1943
elif (self.source_index is not None and
1944
candidate_entry[1][self.source_index][0] not in 'ar'):
1946
if candidate_entry[1][self.target_index][0] == 'a':
1947
# Deleted, emit it here.
1948
selected_entries.append(candidate_entry)
1950
# renamed, emit it when we process the directory it
1952
self.search_specific_file_parents.add(
1953
candidate_entry[1][self.target_index][1])
1955
raise AssertionError(
1956
"Missing entry for specific path parent %r, %r" % (
1957
path_utf8, path_entries))
1958
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1959
for entry in selected_entries:
1960
if entry[0][2] in self.seen_ids:
1962
result, changed = self._process_entry(entry, path_info)
1964
raise AssertionError(
1965
"Got entry<->path mismatch for specific path "
1966
"%r entry %r path_info %r " % (
1967
path_utf8, entry, path_info))
1968
# Only include changes - we're outside the users requested
1971
self._gather_result_for_consistency(result)
1972
if (result[6][0] == 'directory' and
1973
result[6][1] != 'directory'):
1974
# This stopped being a directory, the old children have
1976
if entry[1][self.source_index][0] == 'r':
1977
# renamed, take the source path
1978
entry_path_utf8 = entry[1][self.source_index][1]
1980
entry_path_utf8 = path_utf8
1981
initial_key = (entry_path_utf8, '', '')
1982
block_index, _ = self.state._find_block_index_from_key(
1984
if block_index == 0:
1985
# The children of the root are in block index 1.
1986
block_index = block_index + 1
1987
current_block = None
1988
if block_index < len(self.state._dirblocks):
1989
current_block = self.state._dirblocks[block_index]
1990
if not osutils.is_inside(
1991
entry_path_utf8, current_block[0]):
1992
# No entries for this directory at all.
1993
current_block = None
1994
if current_block is not None:
1995
for entry in current_block[1]:
1996
if entry[1][self.source_index][0] in 'ar':
1997
# Not in the source tree, so doesn't have to be
2000
# Path of the entry itself.
2001
self.search_specific_file_parents.add(
2002
self.pathjoin(*entry[0][:2]))
2003
if changed or self.include_unchanged:
2004
results.append((result, changed))
2005
self.searched_exact_paths.add(path_utf8)
2008
cdef object _path_info(self, utf8_path, unicode_path):
2009
"""Generate path_info for unicode_path.
2011
:return: None if unicode_path does not exist, or a path_info tuple.
2013
abspath = self.tree.abspath(unicode_path)
2015
stat = os.lstat(abspath)
2017
if e.errno == errno.ENOENT:
2018
# the path does not exist.
2022
utf8_basename = utf8_path.rsplit('/', 1)[-1]
2023
dir_info = (utf8_path, utf8_basename,
2024
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
2026
if dir_info[2] == 'directory':
2027
if self.tree._directory_is_tree_reference(
2029
self.root_dir_info = self.root_dir_info[:2] + \
2030
('tree-reference',) + self.root_dir_info[3:]