288
293
if not PyString_CheckExact(path2):
289
294
raise TypeError("'path2' must be a plain string, not %s: %r"
290
295
% (type(path2), path2))
291
return _cmp_path_by_dirblock(PyString_AsString(path1),
292
PyString_Size(path1),
293
PyString_AsString(path2),
294
PyString_Size(path2))
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
char *path2, int path2_len):
296
return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
297
PyString_Size(path1),
298
PyString_AsString(path2),
299
PyString_Size(path2))
302
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
303
char *path2, int path2_len): # cannot_raise
299
304
"""Compare two paths by what directory they are in.
301
see ``_cmp_path_by_dirblock_c`` for details.
306
see ``_cmp_path_by_dirblock`` for details.
303
308
cdef char *dirname1
304
309
cdef int dirname1_len
648
656
# Build up the key that will be used.
649
657
# By using <object>(void *) Pyrex will automatically handle the
650
658
# Py_INCREF that we need.
651
path_name_file_id_key = (<object>p_current_dirname[0],
659
cur_dirname = <object>p_current_dirname[0]
660
# Use StaticTuple_New to pre-allocate, rather than creating a regular
661
# tuple and passing it to the StaticTuple constructor.
662
# path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
663
# self.get_next_str(),
664
# self.get_next_str(),
666
tmp = StaticTuple_New(3)
667
Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
668
cur_basename = self.get_next_str()
669
cur_file_id = self.get_next_str()
670
Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
671
Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
672
path_name_file_id_key = tmp
656
674
# Parse all of the per-tree information. current has the information in
657
675
# the same location as parent trees. The only difference is that 'info'
675
693
executable_cstr = self.get_next(&cur_size)
676
694
is_executable = (executable_cstr[0] == c'y')
677
695
info = self.get_next_str()
678
PyList_Append(trees, (
696
# TODO: If we want to use StaticTuple_New here we need to be pretty
697
# careful. We are relying on a bit of Pyrex
698
# automatic-conversion from 'int' to PyInt, and that doesn't
699
# play well with the StaticTuple_SET_ITEM macro.
700
# Timing doesn't (yet) show a worthwile improvement in speed
701
# versus complexity and maintainability.
702
# tmp = StaticTuple_New(5)
703
# Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
704
# Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
705
# Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
706
# Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
707
# Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
708
# PyList_Append(trees, tmp)
709
PyList_Append(trees, StaticTuple(
679
710
minikind, # minikind
680
711
fingerprint, # fingerprint
681
712
entry_size, # size
960
994
cdef class ProcessEntryC:
996
cdef int doing_consistency_expansion
962
997
cdef object old_dirname_to_file_id # dict
963
998
cdef object new_dirname_to_file_id # dict
964
cdef readonly object uninteresting
965
999
cdef object last_source_parent
966
1000
cdef object last_target_parent
967
cdef object include_unchanged
1001
cdef int include_unchanged
968
1003
cdef object use_filesystem_for_exec
969
1004
cdef object utf8_decode
970
1005
cdef readonly object searched_specific_files
1006
cdef readonly object searched_exact_paths
971
1007
cdef object search_specific_files
1008
# The parents up to the root of the paths we are searching.
1009
# After all normal paths are returned, these specific items are returned.
1010
cdef object search_specific_file_parents
972
1011
cdef object state
973
1012
# Current iteration variables:
974
1013
cdef object current_root
986
1025
cdef object current_block_list
987
1026
cdef object current_dir_info
988
1027
cdef object current_dir_list
1028
cdef object _pending_consistent_entries # list
989
1029
cdef int path_index
990
1030
cdef object root_dir_info
991
1031
cdef object bisect_left
992
1032
cdef object pathjoin
993
1033
cdef object fstat
1034
# A set of the ids we've output when doing partial output.
1035
cdef object seen_ids
994
1036
cdef object sha_file
996
1038
def __init__(self, include_unchanged, use_filesystem_for_exec,
997
1039
search_specific_files, state, source_index, target_index,
998
1040
want_unversioned, tree):
1041
self.doing_consistency_expansion = 0
999
1042
self.old_dirname_to_file_id = {}
1000
1043
self.new_dirname_to_file_id = {}
1001
# Just a sentry, so that _process_entry can say that this
1002
# record is handled, but isn't interesting to process (unchanged)
1003
self.uninteresting = object()
1044
# Are we doing a partial iter_changes?
1045
self.partial = set(['']).__ne__(search_specific_files)
1004
1046
# Using a list so that we can access the values and change them in
1005
1047
# nested scope. Each one is [path, file_id, entry]
1006
1048
self.last_source_parent = [None, None]
1007
1049
self.last_target_parent = [None, None]
1008
self.include_unchanged = include_unchanged
1050
if include_unchanged is None:
1051
self.include_unchanged = False
1053
self.include_unchanged = int(include_unchanged)
1009
1054
self.use_filesystem_for_exec = use_filesystem_for_exec
1010
1055
self.utf8_decode = cache_utf8._utf8_decode
1011
1056
# for all search_indexs in each path at or under each element of
1012
# search_specific_files, if the detail is relocated: add the id, and add the
1013
# relocated path as one to search if its not searched already. If the
1014
# detail is not relocated, add the id.
1057
# search_specific_files, if the detail is relocated: add the id, and
1058
# add the relocated path as one to search if its not searched already.
1059
# If the detail is not relocated, add the id.
1015
1060
self.searched_specific_files = set()
1061
# When we search exact paths without expanding downwards, we record
1063
self.searched_exact_paths = set()
1016
1064
self.search_specific_files = search_specific_files
1065
# The parents up to the root of the paths we are searching.
1066
# After all normal paths are returned, these specific items are returned.
1067
self.search_specific_file_parents = set()
1068
# The ids we've sent out in the delta.
1069
self.seen_ids = set()
1017
1070
self.state = state
1018
1071
self.current_root = None
1019
1072
self.current_root_unicode = None
1035
1088
self.current_block_pos = -1
1036
1089
self.current_dir_info = None
1037
1090
self.current_dir_list = None
1091
self._pending_consistent_entries = []
1038
1092
self.path_index = 0
1039
1093
self.root_dir_info = None
1040
1094
self.bisect_left = bisect.bisect_left
1041
1095
self.pathjoin = osutils.pathjoin
1042
1096
self.fstat = os.fstat
1043
1097
self.sha_file = osutils.sha_file
1098
if target_index != 0:
1099
# A lot of code in here depends on target_index == 0
1100
raise errors.BzrError('unsupported target index')
1045
1102
cdef _process_entry(self, entry, path_info):
1046
1103
"""Compare an entry and real disk to generate delta information.
1048
1105
:param path_info: top_relpath, basename, kind, lstat, abspath for
1049
the path of entry. If None, then the path is considered absent.
1050
(Perhaps we should pass in a concrete entry for this ?)
1106
the path of entry. If None, then the path is considered absent in
1107
the target (Perhaps we should pass in a concrete entry for this ?)
1051
1108
Basename is returned as a utf8 string because we expect this
1052
1109
tuple will be ignored, and don't want to take the time to
1054
:return: None if the these don't match
1055
A tuple of information about the change, or
1056
the object 'uninteresting' if these match, but are
1057
basically identical.
1111
:return: (iter_changes_result, changed). If the entry has not been
1112
handled then changed is None. Otherwise it is False if no content
1113
or metadata changes have occured, and True if any content or
1114
metadata change has occurred. If self.include_unchanged is True then
1115
if changed is not None, iter_changes_result will always be a result
1116
tuple. Otherwise, iter_changes_result is None unless changed is
1059
1119
cdef char target_minikind
1060
1120
cdef char source_minikind
1138
1201
if source_minikind != c'f':
1139
1202
content_change = 1
1141
# If the size is the same, check the sha:
1142
if target_details[2] == source_details[2]:
1143
if link_or_sha1 is None:
1145
file_obj = file(path_info[4], 'rb')
1147
# XXX: TODO: Use lower level file IO rather
1148
# than python objects for sha-misses.
1149
statvalue = self.fstat(file_obj.fileno())
1150
link_or_sha1 = self.sha_file(file_obj)
1153
self.state._observed_sha1(entry, link_or_sha1,
1155
content_change = (link_or_sha1 != source_details[1])
1157
# Size changed, so must be different
1204
# Check the sha. We can't just rely on the size as
1205
# content filtering may mean differ sizes actually
1206
# map to the same content
1207
if link_or_sha1 is None:
1209
statvalue, link_or_sha1 = \
1210
self.state._sha1_provider.stat_and_sha1(
1212
self.state._observed_sha1(entry, link_or_sha1,
1214
content_change = (link_or_sha1 != source_details[1])
1159
1215
# Target details is updated at update_entry time
1160
1216
if self.use_filesystem_for_exec:
1161
1217
# We don't need S_ISREG here, because we are sure
1311
1371
(parent_id, None),
1312
1372
(self.utf8_decode(entry[0][1])[0], None),
1313
1373
(_minikind_to_kind(source_minikind), None),
1314
(source_details[3], None))
1374
(source_details[3], None)), True
1315
1375
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1316
1376
# a rename; could be a true rename, or a rename inherited from
1317
1377
# a renamed parent. TODO: handle this efficiently. Its not
1318
1378
# common case to rename dirs though, so a correct but slow
1319
1379
# implementation will do.
1320
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1380
if (not self.doing_consistency_expansion and
1381
not osutils.is_inside_any(self.searched_specific_files,
1382
target_details[1])):
1321
1383
self.search_specific_files.add(target_details[1])
1384
# We don't expand the specific files parents list here as
1385
# the path is absent in target and won't create a delta with
1322
1387
elif ((source_minikind == c'r' or source_minikind == c'a') and
1323
1388
(target_minikind == c'r' or target_minikind == c'a')):
1324
1389
# neither of the selected trees contain this path,
1338
1403
def iter_changes(self):
1341
cdef void _update_current_block(self):
1406
cdef int _gather_result_for_consistency(self, result) except -1:
1407
"""Check a result we will yield to make sure we are consistent later.
1409
This gathers result's parents into a set to output later.
1411
:param result: A result tuple.
1413
if not self.partial or not result[0]:
1415
self.seen_ids.add(result[0])
1416
new_path = result[1][1]
1418
# Not the root and not a delete: queue up the parents of the path.
1419
self.search_specific_file_parents.update(
1420
osutils.parent_directories(new_path.encode('utf8')))
1421
# Add the root directory which parent_directories does not
1423
self.search_specific_file_parents.add('')
1426
cdef int _update_current_block(self) except -1:
1342
1427
if (self.block_index < len(self.state._dirblocks) and
1343
1428
osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1344
1429
self.current_block = self.state._dirblocks[self.block_index]
1404
1490
cdef char * current_dirname_c, * current_blockname_c
1405
1491
cdef int advance_entry, advance_path
1406
1492
cdef int path_handled
1407
uninteresting = self.uninteresting
1408
1493
searched_specific_files = self.searched_specific_files
1409
1494
# Are we walking a root?
1410
1495
while self.root_entries_pos < self.root_entries_len:
1411
1496
entry = self.root_entries[self.root_entries_pos]
1412
1497
self.root_entries_pos = self.root_entries_pos + 1
1413
result = self._process_entry(entry, self.root_dir_info)
1414
if result is not None and result is not self.uninteresting:
1498
result, changed = self._process_entry(entry, self.root_dir_info)
1499
if changed is not None:
1501
self._gather_result_for_consistency(result)
1502
if changed or self.include_unchanged:
1416
1504
# Have we finished the prior root, or never started one ?
1417
1505
if self.current_root is None:
1418
1506
# TODO: the pending list should be lexically sorted? the
1419
1507
# interface doesn't require it.
1421
1509
self.current_root = self.search_specific_files.pop()
1423
1511
raise StopIteration()
1424
self.current_root_unicode = self.current_root.decode('utf8')
1425
1512
self.searched_specific_files.add(self.current_root)
1426
1513
# process the entries for this containing directory: the rest will be
1427
1514
# found by their parents recursively.
1428
1515
self.root_entries = self.state._entries_for_path(self.current_root)
1429
1516
self.root_entries_len = len(self.root_entries)
1517
self.current_root_unicode = self.current_root.decode('utf8')
1430
1518
self.root_abspath = self.tree.abspath(self.current_root_unicode)
1432
1520
root_stat = os.lstat(self.root_abspath)
1778
1881
self.current_dir_info = self.dir_iterator.next()
1779
1882
self.current_dir_list = self.current_dir_info[1]
1780
except StopIteration:
1883
except StopIteration, _:
1781
1884
self.current_dir_info = None
1886
cdef object _next_consistent_entries(self):
1887
"""Grabs the next specific file parent case to consider.
1889
:return: A list of the results, each of which is as for _process_entry.
1892
while self.search_specific_file_parents:
1893
# Process the parent directories for the paths we were iterating.
1894
# Even in extremely large trees this should be modest, so currently
1895
# no attempt is made to optimise.
1896
path_utf8 = self.search_specific_file_parents.pop()
1897
if path_utf8 in self.searched_exact_paths:
1898
# We've examined this path.
1900
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1901
# We've examined this path.
1903
path_entries = self.state._entries_for_path(path_utf8)
1904
# We need either one or two entries. If the path in
1905
# self.target_index has moved (so the entry in source_index is in
1906
# 'ar') then we need to also look for the entry for this path in
1907
# self.source_index, to output the appropriate delete-or-rename.
1908
selected_entries = []
1910
for candidate_entry in path_entries:
1911
# Find entries present in target at this path:
1912
if candidate_entry[1][self.target_index][0] not in 'ar':
1914
selected_entries.append(candidate_entry)
1915
# Find entries present in source at this path:
1916
elif (self.source_index is not None and
1917
candidate_entry[1][self.source_index][0] not in 'ar'):
1919
if candidate_entry[1][self.target_index][0] == 'a':
1920
# Deleted, emit it here.
1921
selected_entries.append(candidate_entry)
1923
# renamed, emit it when we process the directory it
1925
self.search_specific_file_parents.add(
1926
candidate_entry[1][self.target_index][1])
1928
raise AssertionError(
1929
"Missing entry for specific path parent %r, %r" % (
1930
path_utf8, path_entries))
1931
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1932
for entry in selected_entries:
1933
if entry[0][2] in self.seen_ids:
1935
result, changed = self._process_entry(entry, path_info)
1937
raise AssertionError(
1938
"Got entry<->path mismatch for specific path "
1939
"%r entry %r path_info %r " % (
1940
path_utf8, entry, path_info))
1941
# Only include changes - we're outside the users requested
1944
self._gather_result_for_consistency(result)
1945
if (result[6][0] == 'directory' and
1946
result[6][1] != 'directory'):
1947
# This stopped being a directory, the old children have
1949
if entry[1][self.source_index][0] == 'r':
1950
# renamed, take the source path
1951
entry_path_utf8 = entry[1][self.source_index][1]
1953
entry_path_utf8 = path_utf8
1954
initial_key = (entry_path_utf8, '', '')
1955
block_index, _ = self.state._find_block_index_from_key(
1957
if block_index == 0:
1958
# The children of the root are in block index 1.
1959
block_index = block_index + 1
1960
current_block = None
1961
if block_index < len(self.state._dirblocks):
1962
current_block = self.state._dirblocks[block_index]
1963
if not osutils.is_inside(
1964
entry_path_utf8, current_block[0]):
1965
# No entries for this directory at all.
1966
current_block = None
1967
if current_block is not None:
1968
for entry in current_block[1]:
1969
if entry[1][self.source_index][0] in 'ar':
1970
# Not in the source tree, so doesn't have to be
1973
# Path of the entry itself.
1974
self.search_specific_file_parents.add(
1975
self.pathjoin(*entry[0][:2]))
1976
if changed or self.include_unchanged:
1977
results.append((result, changed))
1978
self.searched_exact_paths.add(path_utf8)
1981
cdef object _path_info(self, utf8_path, unicode_path):
1982
"""Generate path_info for unicode_path.
1984
:return: None if unicode_path does not exist, or a path_info tuple.
1986
abspath = self.tree.abspath(unicode_path)
1988
stat = os.lstat(abspath)
1990
if e.errno == errno.ENOENT:
1991
# the path does not exist.
1995
utf8_basename = utf8_path.rsplit('/', 1)[-1]
1996
dir_info = (utf8_path, utf8_basename,
1997
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
1999
if dir_info[2] == 'directory':
2000
if self.tree._directory_is_tree_reference(
2002
self.root_dir_info = self.root_dir_info[:2] + \
2003
('tree-reference',) + self.root_dir_info[3:]