293
288
if not PyString_CheckExact(path2):
294
289
raise TypeError("'path2' must be a plain string, not %s: %r"
295
290
% (type(path2), path2))
296
return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
297
PyString_Size(path1),
298
PyString_AsString(path2),
299
PyString_Size(path2))
302
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
303
char *path2, int path2_len): # cannot_raise
291
return _cmp_path_by_dirblock(PyString_AsString(path1),
292
PyString_Size(path1),
293
PyString_AsString(path2),
294
PyString_Size(path2))
297
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
298
char *path2, int path2_len):
304
299
"""Compare two paths by what directory they are in.
306
see ``_cmp_path_by_dirblock`` for details.
301
see ``_cmp_path_by_dirblock_c`` for details.
308
303
cdef char *dirname1
309
304
cdef int dirname1_len
656
648
# Build up the key that will be used.
657
649
# By using <object>(void *) Pyrex will automatically handle the
658
650
# Py_INCREF that we need.
659
cur_dirname = <object>p_current_dirname[0]
660
# Use StaticTuple_New to pre-allocate, rather than creating a regular
661
# tuple and passing it to the StaticTuple constructor.
662
# path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
663
# self.get_next_str(),
664
# self.get_next_str(),
666
tmp = StaticTuple_New(3)
667
Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
668
cur_basename = self.get_next_str()
669
cur_file_id = self.get_next_str()
670
Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
671
Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
672
path_name_file_id_key = tmp
651
path_name_file_id_key = (<object>p_current_dirname[0],
674
656
# Parse all of the per-tree information. current has the information in
675
657
# the same location as parent trees. The only difference is that 'info'
693
675
executable_cstr = self.get_next(&cur_size)
694
676
is_executable = (executable_cstr[0] == c'y')
695
677
info = self.get_next_str()
696
# TODO: If we want to use StaticTuple_New here we need to be pretty
697
# careful. We are relying on a bit of Pyrex
698
# automatic-conversion from 'int' to PyInt, and that doesn't
699
# play well with the StaticTuple_SET_ITEM macro.
700
# Timing doesn't (yet) show a worthwile improvement in speed
701
# versus complexity and maintainability.
702
# tmp = StaticTuple_New(5)
703
# Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
704
# Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
705
# Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
706
# Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
707
# Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
708
# PyList_Append(trees, tmp)
709
PyList_Append(trees, StaticTuple(
678
PyList_Append(trees, (
710
679
minikind, # minikind
711
680
fingerprint, # fingerprint
712
681
entry_size, # size
994
962
cdef class ProcessEntryC:
996
cdef int doing_consistency_expansion
997
964
cdef object old_dirname_to_file_id # dict
998
965
cdef object new_dirname_to_file_id # dict
966
cdef readonly object uninteresting
999
967
cdef object last_source_parent
1000
968
cdef object last_target_parent
1001
cdef int include_unchanged
969
cdef object include_unchanged
1003
970
cdef object use_filesystem_for_exec
1004
971
cdef object utf8_decode
1005
972
cdef readonly object searched_specific_files
1006
cdef readonly object searched_exact_paths
1007
973
cdef object search_specific_files
1008
# The parents up to the root of the paths we are searching.
1009
# After all normal paths are returned, these specific items are returned.
1010
cdef object search_specific_file_parents
1011
974
cdef object state
1012
975
# Current iteration variables:
1013
976
cdef object current_root
1025
988
cdef object current_block_list
1026
989
cdef object current_dir_info
1027
990
cdef object current_dir_list
1028
cdef object _pending_consistent_entries # list
1029
991
cdef int path_index
1030
992
cdef object root_dir_info
1031
993
cdef object bisect_left
1032
994
cdef object pathjoin
1033
995
cdef object fstat
1034
# A set of the ids we've output when doing partial output.
1035
cdef object seen_ids
1036
996
cdef object sha_file
1038
998
def __init__(self, include_unchanged, use_filesystem_for_exec,
1039
999
search_specific_files, state, source_index, target_index,
1040
1000
want_unversioned, tree):
1041
self.doing_consistency_expansion = 0
1042
1001
self.old_dirname_to_file_id = {}
1043
1002
self.new_dirname_to_file_id = {}
1044
# Are we doing a partial iter_changes?
1045
self.partial = set(['']).__ne__(search_specific_files)
1003
# Just a sentry, so that _process_entry can say that this
1004
# record is handled, but isn't interesting to process (unchanged)
1005
self.uninteresting = object()
1046
1006
# Using a list so that we can access the values and change them in
1047
1007
# nested scope. Each one is [path, file_id, entry]
1048
1008
self.last_source_parent = [None, None]
1049
1009
self.last_target_parent = [None, None]
1050
if include_unchanged is None:
1051
self.include_unchanged = False
1053
self.include_unchanged = int(include_unchanged)
1010
self.include_unchanged = include_unchanged
1054
1011
self.use_filesystem_for_exec = use_filesystem_for_exec
1055
1012
self.utf8_decode = cache_utf8._utf8_decode
1056
1013
# for all search_indexs in each path at or under each element of
1057
# search_specific_files, if the detail is relocated: add the id, and
1058
# add the relocated path as one to search if its not searched already.
1059
# If the detail is not relocated, add the id.
1014
# search_specific_files, if the detail is relocated: add the id, and add the
1015
# relocated path as one to search if its not searched already. If the
1016
# detail is not relocated, add the id.
1060
1017
self.searched_specific_files = set()
1061
# When we search exact paths without expanding downwards, we record
1063
self.searched_exact_paths = set()
1064
1018
self.search_specific_files = search_specific_files
1065
# The parents up to the root of the paths we are searching.
1066
# After all normal paths are returned, these specific items are returned.
1067
self.search_specific_file_parents = set()
1068
# The ids we've sent out in the delta.
1069
self.seen_ids = set()
1070
1019
self.state = state
1071
1020
self.current_root = None
1072
1021
self.current_root_unicode = None
1088
1037
self.current_block_pos = -1
1089
1038
self.current_dir_info = None
1090
1039
self.current_dir_list = None
1091
self._pending_consistent_entries = []
1092
1040
self.path_index = 0
1093
1041
self.root_dir_info = None
1094
1042
self.bisect_left = bisect.bisect_left
1095
1043
self.pathjoin = osutils.pathjoin
1096
1044
self.fstat = os.fstat
1097
1045
self.sha_file = osutils.sha_file
1098
if target_index != 0:
1099
# A lot of code in here depends on target_index == 0
1100
raise errors.BzrError('unsupported target index')
1102
1047
cdef _process_entry(self, entry, path_info):
1103
1048
"""Compare an entry and real disk to generate delta information.
1105
1050
:param path_info: top_relpath, basename, kind, lstat, abspath for
1106
the path of entry. If None, then the path is considered absent in
1107
the target (Perhaps we should pass in a concrete entry for this ?)
1051
the path of entry. If None, then the path is considered absent.
1052
(Perhaps we should pass in a concrete entry for this ?)
1108
1053
Basename is returned as a utf8 string because we expect this
1109
1054
tuple will be ignored, and don't want to take the time to
1111
:return: (iter_changes_result, changed). If the entry has not been
1112
handled then changed is None. Otherwise it is False if no content
1113
or metadata changes have occured, and True if any content or
1114
metadata change has occurred. If self.include_unchanged is True then
1115
if changed is not None, iter_changes_result will always be a result
1116
tuple. Otherwise, iter_changes_result is None unless changed is
1056
:return: None if the these don't match
1057
A tuple of information about the change, or
1058
the object 'uninteresting' if these match, but are
1059
basically identical.
1119
1061
cdef char target_minikind
1120
1062
cdef char source_minikind
1201
1140
if source_minikind != c'f':
1202
1141
content_change = 1
1204
# Check the sha. We can't just rely on the size as
1205
# content filtering may mean differ sizes actually
1206
# map to the same content
1207
if link_or_sha1 is None:
1209
statvalue, link_or_sha1 = \
1210
self.state._sha1_provider.stat_and_sha1(
1212
self.state._observed_sha1(entry, link_or_sha1,
1214
content_change = (link_or_sha1 != source_details[1])
1143
# If the size is the same, check the sha:
1144
if target_details[2] == source_details[2]:
1145
if link_or_sha1 is None:
1147
statvalue, link_or_sha1 = \
1148
self.state._sha1_provider.stat_and_sha1(
1150
self.state._observed_sha1(entry, link_or_sha1,
1152
content_change = (link_or_sha1 != source_details[1])
1154
# Size changed, so must be different
1215
1156
# Target details is updated at update_entry time
1216
1157
if self.use_filesystem_for_exec:
1217
1158
# We don't need S_ISREG here, because we are sure
1371
1308
(parent_id, None),
1372
1309
(self.utf8_decode(entry[0][1])[0], None),
1373
1310
(_minikind_to_kind(source_minikind), None),
1374
(source_details[3], None)), True
1311
(source_details[3], None))
1375
1312
elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1376
1313
# a rename; could be a true rename, or a rename inherited from
1377
1314
# a renamed parent. TODO: handle this efficiently. Its not
1378
1315
# common case to rename dirs though, so a correct but slow
1379
1316
# implementation will do.
1380
if (not self.doing_consistency_expansion and
1381
not osutils.is_inside_any(self.searched_specific_files,
1382
target_details[1])):
1317
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1383
1318
self.search_specific_files.add(target_details[1])
1384
# We don't expand the specific files parents list here as
1385
# the path is absent in target and won't create a delta with
1387
1319
elif ((source_minikind == c'r' or source_minikind == c'a') and
1388
1320
(target_minikind == c'r' or target_minikind == c'a')):
1389
1321
# neither of the selected trees contain this path,
1403
1335
def iter_changes(self):
1406
cdef int _gather_result_for_consistency(self, result) except -1:
1407
"""Check a result we will yield to make sure we are consistent later.
1409
This gathers result's parents into a set to output later.
1411
:param result: A result tuple.
1413
if not self.partial or not result[0]:
1415
self.seen_ids.add(result[0])
1416
new_path = result[1][1]
1418
# Not the root and not a delete: queue up the parents of the path.
1419
self.search_specific_file_parents.update(
1420
osutils.parent_directories(new_path.encode('utf8')))
1421
# Add the root directory which parent_directories does not
1423
self.search_specific_file_parents.add('')
1426
cdef int _update_current_block(self) except -1:
1338
cdef void _update_current_block(self):
1427
1339
if (self.block_index < len(self.state._dirblocks) and
1428
1340
osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1429
1341
self.current_block = self.state._dirblocks[self.block_index]
1490
1401
cdef char * current_dirname_c, * current_blockname_c
1491
1402
cdef int advance_entry, advance_path
1492
1403
cdef int path_handled
1404
uninteresting = self.uninteresting
1493
1405
searched_specific_files = self.searched_specific_files
1494
1406
# Are we walking a root?
1495
1407
while self.root_entries_pos < self.root_entries_len:
1496
1408
entry = self.root_entries[self.root_entries_pos]
1497
1409
self.root_entries_pos = self.root_entries_pos + 1
1498
result, changed = self._process_entry(entry, self.root_dir_info)
1499
if changed is not None:
1501
self._gather_result_for_consistency(result)
1502
if changed or self.include_unchanged:
1410
result = self._process_entry(entry, self.root_dir_info)
1411
if result is not None and result is not self.uninteresting:
1504
1413
# Have we finished the prior root, or never started one ?
1505
1414
if self.current_root is None:
1506
1415
# TODO: the pending list should be lexically sorted? the
1507
1416
# interface doesn't require it.
1509
1418
self.current_root = self.search_specific_files.pop()
1511
1420
raise StopIteration()
1421
self.current_root_unicode = self.current_root.decode('utf8')
1512
1422
self.searched_specific_files.add(self.current_root)
1513
1423
# process the entries for this containing directory: the rest will be
1514
1424
# found by their parents recursively.
1515
1425
self.root_entries = self.state._entries_for_path(self.current_root)
1516
1426
self.root_entries_len = len(self.root_entries)
1517
self.current_root_unicode = self.current_root.decode('utf8')
1518
1427
self.root_abspath = self.tree.abspath(self.current_root_unicode)
1520
1429
root_stat = os.lstat(self.root_abspath)
1881
1775
self.current_dir_info = self.dir_iterator.next()
1882
1776
self.current_dir_list = self.current_dir_info[1]
1883
except StopIteration, _:
1777
except StopIteration:
1884
1778
self.current_dir_info = None
1886
cdef object _next_consistent_entries(self):
1887
"""Grabs the next specific file parent case to consider.
1889
:return: A list of the results, each of which is as for _process_entry.
1892
while self.search_specific_file_parents:
1893
# Process the parent directories for the paths we were iterating.
1894
# Even in extremely large trees this should be modest, so currently
1895
# no attempt is made to optimise.
1896
path_utf8 = self.search_specific_file_parents.pop()
1897
if path_utf8 in self.searched_exact_paths:
1898
# We've examined this path.
1900
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1901
# We've examined this path.
1903
path_entries = self.state._entries_for_path(path_utf8)
1904
# We need either one or two entries. If the path in
1905
# self.target_index has moved (so the entry in source_index is in
1906
# 'ar') then we need to also look for the entry for this path in
1907
# self.source_index, to output the appropriate delete-or-rename.
1908
selected_entries = []
1910
for candidate_entry in path_entries:
1911
# Find entries present in target at this path:
1912
if candidate_entry[1][self.target_index][0] not in 'ar':
1914
selected_entries.append(candidate_entry)
1915
# Find entries present in source at this path:
1916
elif (self.source_index is not None and
1917
candidate_entry[1][self.source_index][0] not in 'ar'):
1919
if candidate_entry[1][self.target_index][0] == 'a':
1920
# Deleted, emit it here.
1921
selected_entries.append(candidate_entry)
1923
# renamed, emit it when we process the directory it
1925
self.search_specific_file_parents.add(
1926
candidate_entry[1][self.target_index][1])
1928
raise AssertionError(
1929
"Missing entry for specific path parent %r, %r" % (
1930
path_utf8, path_entries))
1931
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1932
for entry in selected_entries:
1933
if entry[0][2] in self.seen_ids:
1935
result, changed = self._process_entry(entry, path_info)
1937
raise AssertionError(
1938
"Got entry<->path mismatch for specific path "
1939
"%r entry %r path_info %r " % (
1940
path_utf8, entry, path_info))
1941
# Only include changes - we're outside the users requested
1944
self._gather_result_for_consistency(result)
1945
if (result[6][0] == 'directory' and
1946
result[6][1] != 'directory'):
1947
# This stopped being a directory, the old children have
1949
if entry[1][self.source_index][0] == 'r':
1950
# renamed, take the source path
1951
entry_path_utf8 = entry[1][self.source_index][1]
1953
entry_path_utf8 = path_utf8
1954
initial_key = (entry_path_utf8, '', '')
1955
block_index, _ = self.state._find_block_index_from_key(
1957
if block_index == 0:
1958
# The children of the root are in block index 1.
1959
block_index = block_index + 1
1960
current_block = None
1961
if block_index < len(self.state._dirblocks):
1962
current_block = self.state._dirblocks[block_index]
1963
if not osutils.is_inside(
1964
entry_path_utf8, current_block[0]):
1965
# No entries for this directory at all.
1966
current_block = None
1967
if current_block is not None:
1968
for entry in current_block[1]:
1969
if entry[1][self.source_index][0] in 'ar':
1970
# Not in the source tree, so doesn't have to be
1973
# Path of the entry itself.
1974
self.search_specific_file_parents.add(
1975
self.pathjoin(*entry[0][:2]))
1976
if changed or self.include_unchanged:
1977
results.append((result, changed))
1978
self.searched_exact_paths.add(path_utf8)
1981
cdef object _path_info(self, utf8_path, unicode_path):
1982
"""Generate path_info for unicode_path.
1984
:return: None if unicode_path does not exist, or a path_info tuple.
1986
abspath = self.tree.abspath(unicode_path)
1988
stat = os.lstat(abspath)
1990
if e.errno == errno.ENOENT:
1991
# the path does not exist.
1995
utf8_basename = utf8_path.rsplit('/', 1)[-1]
1996
dir_info = (utf8_path, utf8_basename,
1997
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
1999
if dir_info[2] == 'directory':
2000
if self.tree._directory_is_tree_reference(
2002
self.root_dir_info = self.root_dir_info[:2] + \
2003
('tree-reference',) + self.root_dir_info[3:]