1012
1020
def iter_all(self):
1013
1021
"""Match up the values in the different trees."""
1022
for result in self._walk_master_tree():
1024
self._finish_others()
1025
for result in self._walk_others():
1028
def _walk_master_tree(self):
1029
"""First pass, walk all trees in lock-step.
1031
When we are done, all nodes in the master_tree will have been
1032
processed. _other_walkers, _other_entries, and _others_extra will be
1033
set on 'self' for future processing.
1035
# This iterator has the most "inlining" done, because it tends to touch
1036
# every file in the tree, while the others only hit nodes that don't
1014
1038
master_iterator = self._master_tree.iter_entries_by_dir()
1016
1040
other_walkers = [other.iter_entries_by_dir()
1017
1041
for other in self._other_trees]
1018
1042
other_entries = [self._step_one(walker) for walker in other_walkers]
1020
1043
# Track extra nodes in the other trees
1021
1044
others_extra = [{} for i in xrange(len(self._other_trees))]
1023
1046
master_has_more = True
1047
step_one = self._step_one
1048
lookup_by_file_id = self._lookup_by_file_id
1049
out_of_order_processed = self._out_of_order_processed
1024
1051
while master_has_more:
1025
(master_has_more, master_path,
1026
master_ie) = self._step_one(master_iterator)
1052
(master_has_more, path, master_ie) = step_one(master_iterator)
1027
1053
if not master_has_more:
1030
1056
file_id = master_ie.file_id
1031
1057
other_values = []
1058
other_values_append = other_values.append
1032
1059
next_other_entries = []
1060
next_other_entries_append = next_other_entries.append
1033
1061
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1034
1062
if not other_has_more:
1035
other_values.append(self._lookup_by_file_id(
1063
other_values_append(lookup_by_file_id(
1036
1064
others_extra[idx], self._other_trees[idx], file_id))
1037
next_other_entries.append((False, None, None))
1065
next_other_entries_append((False, None, None))
1038
1066
elif file_id == other_ie.file_id:
1039
# This walker matched, so consume this path, and go on to
1041
other_values.append((other_path, other_ie))
1042
next_other_entries.append(self._step_one(other_walkers[idx]))
1067
# This is the critical code path, as most of the entries
1068
# should match between most trees.
1069
other_values_append((other_path, other_ie))
1070
next_other_entries_append(step_one(other_walkers[idx]))
1044
1072
# This walker did not match, step it until it either
1045
1073
# matches, or we know we are past the current walker.
1074
other_walker = other_walkers[idx]
1075
other_extra = others_extra[idx]
1046
1076
while (other_has_more and
1047
self._cmp_path_by_dirblock(other_path, master_path) < 0):
1077
self._cmp_path_by_dirblock(other_path, path) < 0):
1048
1078
other_file_id = other_ie.file_id
1049
if other_file_id not in self._out_of_order_processed:
1050
others_extra[idx][other_file_id] = (other_path,
1079
if other_file_id not in out_of_order_processed:
1080
other_extra[other_file_id] = (other_path, other_ie)
1052
1081
other_has_more, other_path, other_ie = \
1053
self._step_one(other_walkers[idx])
1082
step_one(other_walker)
1054
1083
if other_has_more and other_ie.file_id == file_id:
1055
# We ended up walking to this point, match and continue
1056
other_values.append((other_path, other_ie))
1084
# We ended up walking to this point, match and step
1086
other_values_append((other_path, other_ie))
1057
1087
other_has_more, other_path, other_ie = \
1058
self._step_one(other_walkers[idx])
1088
step_one(other_walker)
1060
1090
# This record isn't in the normal order, see if it
1062
other_values.append(self._lookup_by_file_id(
1063
others_extra[idx], self._other_trees[idx], file_id))
1064
next_other_entries.append((other_has_more, other_path,
1092
other_values_append(lookup_by_file_id(
1093
other_extra, self._other_trees[idx], file_id))
1094
next_other_entries_append((other_has_more, other_path,
1066
1096
other_entries = next_other_entries
1068
1098
# We've matched all the walkers, yield this datapoint
1069
yield master_path, file_id, master_ie, other_values
1099
yield path, file_id, master_ie, other_values
1100
self._other_walkers = other_walkers
1101
self._other_entries = other_entries
1102
self._others_extra = others_extra
1071
# Make sure we finished iterating all the other trees
1072
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1104
def _finish_others(self):
1105
"""Finish walking the other iterators, so we get all entries."""
1106
for idx, info in enumerate(self._other_entries):
1107
other_extra = self._others_extra[idx]
1108
(other_has_more, other_path, other_ie) = info
1073
1109
while other_has_more:
1074
1110
other_file_id = other_ie.file_id
1075
1111
if other_file_id not in self._out_of_order_processed:
1076
others_extra[idx][other_file_id] = (other_path, other_ie)
1112
other_extra[other_file_id] = (other_path, other_ie)
1077
1113
other_has_more, other_path, other_ie = \
1078
self._step_one(other_walkers[idx])
1080
# We have walked all of the master tree, now we want to find any extra
1081
# nodes in the other trees
1082
def path_key(other):
1084
dirname, basename = os.path.split(path)
1085
return (dirname.split(u'/'), basename)
1087
for idx, other_extra in enumerate(others_extra):
1088
# TODO: we could use a key=XXX rather than cmp=XXX
1089
others = sorted(other_extra.itervalues(), key=path_key)
1114
self._step_one(self._other_walkers[idx])
1115
del self._other_entries
1117
def _walk_others(self):
1118
"""Finish up by walking all the 'deferred' nodes."""
1119
# TODO: One alternative would be to grab all possible unprocessed
1120
# file_ids, and then sort by path, and then yield them. That
1121
# might ensure better ordering, in case a caller strictly
1122
# requires parents before children.
1123
for idx, other_extra in enumerate(self._others_extra):
1124
others = sorted(other_extra.itervalues(), key=self._path_key)
1090
1125
for other_path, other_ie in others:
1091
1126
file_id = other_ie.file_id
1092
1127
# We don't need to check out_of_order_processed here, because