223
224
"""This just keeps track of information as we are bisecting."""
227
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
228
"""Convert stat values into a packed representation."""
229
# jam 20060614 it isn't really worth removing more entries if we
230
# are going to leave it in packed form.
231
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
232
# With all entries filesize is 5.9M and read time is mabye 280ms
233
# well within the noise margin
235
# base64 encoding always adds a final newline, so strip it off
236
# The current version
237
return _encode(_pack('>LLLLLL'
238
, st.st_size, int(st.st_mtime), int(st.st_ctime)
239
, st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
240
# This is 0.060s / 1.520s faster by not encoding as much information
241
# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
242
# This is not strictly faster than _encode(_pack())[:-1]
243
# return '%X.%X.%X.%X.%X.%X' % (
244
# st.st_size, int(st.st_mtime), int(st.st_ctime),
245
# st.st_dev, st.st_ino, st.st_mode)
246
# Similar to the _encode(_pack('>LL'))
247
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
226
250
class DirState(object):
227
251
"""Record directory and metadata state for fast access.
1062
def update_entry(self, entry, abspath, stat_value=None):
1091
def update_entry(self, entry, abspath, stat_value,
1092
_stat_to_minikind=_stat_to_minikind,
1093
_pack_stat=pack_stat):
1063
1094
"""Update the entry based on what is actually on disk.
1065
1096
:param entry: This is the dirblock entry for the file in question.
1069
1100
:return: The sha1 hexdigest of the file (40 bytes) or link target of a
1072
# This code assumes that the entry passed in is directly held in one of
1073
# the internal _dirblocks. So the dirblock state must have already been
1075
assert self._dirblock_state != DirState.NOT_IN_MEMORY
1076
if stat_value is None:
1078
# We could inline os.lstat but the common case is that
1079
# stat_value will be passed in, not read here.
1080
stat_value = self._lstat(abspath, entry)
1081
except (OSError, IOError), e:
1082
if e.errno in (errno.ENOENT, errno.EACCES,
1084
# The entry is missing, consider it gone
1088
kind = osutils.file_kind_from_stat_mode(stat_value.st_mode)
1090
minikind = DirState._kind_to_minikind[kind]
1091
except KeyError: # Unknown kind
1104
minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1093
packed_stat = pack_stat(stat_value)
1108
packed_stat = _pack_stat(stat_value)
1094
1109
(saved_minikind, saved_link_or_sha1, saved_file_size,
1095
1110
saved_executable, saved_packed_stat) = entry[1][0]
1097
1112
if (minikind == saved_minikind
1098
and packed_stat == saved_packed_stat
1113
and packed_stat == saved_packed_stat):
1114
# The stat hasn't changed since we saved, so we can re-use the
1099
1119
# size should also be in packed_stat
1100
and saved_file_size == stat_value.st_size):
1101
# The stat hasn't changed since we saved, so we can potentially
1102
# re-use the saved sha hash.
1106
if self._cutoff_time is None:
1107
self._sha_cutoff_time()
1109
if (stat_value.st_mtime < self._cutoff_time
1110
and stat_value.st_ctime < self._cutoff_time):
1111
# Return the existing fingerprint
1120
if saved_file_size == stat_value.st_size:
1112
1121
return saved_link_or_sha1
1114
1123
# If we have gotten this far, that means that we need to actually
1118
1127
link_or_sha1 = self._sha1_file(abspath, entry)
1119
1128
executable = self._is_executable(stat_value.st_mode,
1120
1129
saved_executable)
1121
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
1122
executable, packed_stat)
1130
if self._cutoff_time is None:
1131
self._sha_cutoff_time()
1132
if (stat_value.st_mtime < self._cutoff_time
1133
and stat_value.st_ctime < self._cutoff_time):
1134
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
1135
executable, packed_stat)
1137
entry[1][0] = ('f', '', stat_value.st_size,
1138
executable, DirState.NULLSTAT)
1123
1139
elif minikind == 'd':
1124
1140
link_or_sha1 = None
1125
1141
entry[1][0] = ('d', '', 0, False, packed_stat)
1133
1149
osutils.pathjoin(entry[0][0], entry[0][1]))
1134
1150
elif minikind == 'l':
1135
1151
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
1136
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
1152
if self._cutoff_time is None:
1153
self._sha_cutoff_time()
1154
if (stat_value.st_mtime < self._cutoff_time
1155
and stat_value.st_ctime < self._cutoff_time):
1156
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
1159
entry[1][0] = ('l', '', stat_value.st_size,
1160
False, DirState.NULLSTAT)
1138
1161
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1139
1162
return link_or_sha1
1900
1922
self._header_state = DirState.IN_MEMORY_MODIFIED
1901
1923
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1902
1924
self._id_index = id_index
1905
1926
def _sort_entries(self, entry_list):
1906
1927
"""Given a list of entries, sort them into the right order.
2380
2401
if cur_split < dirname_split: lo = mid+1
2386
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
2387
"""Convert stat values into a packed representation."""
2388
# jam 20060614 it isn't really worth removing more entries if we
2389
# are going to leave it in packed form.
2390
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
2391
# With all entries filesize is 5.9M and read time is mabye 280ms
2392
# well within the noise margin
2394
# base64.encode always adds a final newline, so strip it off
2395
return _encode(_pack('>LLLLLL'
2396
, st.st_size, int(st.st_mtime), int(st.st_ctime)
2397
, st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]