1084
1084
_directory = _directory_kind
1085
1085
_listdir = os.listdir
1086
pending = [(prefix, "", _directory, None, top)]
1086
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1089
1089
currentdir = pending.pop()
1090
1090
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1091
relroot = currentdir[0]
1091
1092
top = currentdir[4]
1093
relroot = currentdir[0] + '/'
1094
relprefix = relroot + u'/'
1097
top_slash = top + u'/'
1096
1098
for name in sorted(_listdir(top)):
1097
abspath = top + '/' + name
1099
abspath = top_slash + name
1098
1100
statvalue = lstat(abspath)
1099
dirblock.append((relroot + name, name,
1101
dirblock.append((relprefix + name, name,
1100
1102
file_kind_from_stat_mode(statvalue.st_mode),
1101
1103
statvalue, abspath))
1102
yield (currentdir[0], top), dirblock
1104
yield (relroot, top), dirblock
1103
1105
# push the user specified dirs from dirblock
1104
for dir in reversed(dirblock):
1105
if dir[2] == _directory:
1106
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1109
1109
def _walkdirs_utf8(top, prefix=""):
1112
1112
This yields the same information as walkdirs() only each entry is yielded
1113
1113
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1114
1114
are returned as exact byte-strings.
1116
:return: yields a tuple of (dir_info, [file_info])
1117
dir_info is (utf8_relpath, path-from-top)
1118
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1119
if top is an absolute path, path-from-top is also an absolute path.
1120
path-from-top might be unicode or utf8, but it is the correct path to
1121
pass to os functions to affect the file in question. (such as os.lstat)
1123
fs_encoding = sys.getfilesystemencoding()
1124
if (sys.platform == 'win32' or
1125
fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968')): # ascii
1126
return _walkdirs_unicode_to_utf8(top, prefix=prefix)
1128
return _walkdirs_fs_utf8(top, prefix=prefix)
1131
def _walkdirs_fs_utf8(top, prefix=""):
1132
"""See _walkdirs_utf8.
1134
This sub-function is called when we know the filesystem is already in utf8
1135
encoding. So we don't need to transcode filenames.
1116
1137
_lstat = os.lstat
1118
1139
_directory = _directory_kind
1119
1140
_listdir = os.listdir
1120
1141
_kind_from_mode = file_kind_from_stat_mode
1121
if sys.platform == 'win32':
1122
# We need to do the listdir using unicode paths, and then encode them
1124
assert False, 'not supported yet'
1125
if sys.getfilesystemencoding() not in ('UTF-8', 'US-ASCII',
1126
'ANSI_X3.4-1968'): # ascii
1127
assert False, 'not supported yet'
1128
1142
# TODO: make these assert instead
1129
1143
if isinstance(top, unicode):
1130
1144
top = top.encode('utf8')
1131
1145
if isinstance(prefix, unicode):
1132
1146
prefix = prefix.encode('utf8')
1134
# The in-memory dirblocks should always have a prefix ending in '/'
1135
# unless the prefix is '' then it should not have a trailing slash
1136
1148
pending = [(prefix, top)]
1138
1150
relroot, top = pending.pop()
1147
1159
abspath = top_slash + name
1148
1160
statvalue = _lstat(abspath)
1149
1161
kind = _kind_from_mode(statvalue.st_mode)
1150
dirblock.append((rel_prefix + name, name, kind, statvalue, abspath))
1152
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1153
## In list/generator comprehension form. On a 55k entry tree, this form
1154
## takes 1.75s versus 1.8s. So it is saving approx 50ms. Not a huge
1155
## savings, and may not be worth the complexity. And on smaller trees,
1156
## I've seen 115ms here versus 102ms in the for loop. So it isn't
1157
## always a win. This is just left for posterity.
1158
# dirblock = [(rel_prefix + name, # relpath
1160
# _kind_from_mode(statvalue.st_mode), # kind
1162
# abspath) # path on disk
1163
# for name, abspath, statvalue in
1164
# ((name, abspath, _lstat(abspath))
1165
# for name, abspath in
1166
# ((name, top_slash + name)
1167
# for name in sorted(_listdir(top))
1162
dirblock.append((rel_prefix + name, name,
1163
kind, statvalue, abspath))
1165
yield (relroot, top), dirblock
1166
# push the user specified dirs from dirblock
1167
pending.extend((d[0], d[4])
1168
for d in reversed(dirblock)
1169
if d[2] == _directory)
1172
def _walkdirs_unicode_to_utf8(top, prefix=""):
1173
"""See _walkdirs_utf8
1175
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1177
This is currently the fallback code path when the filesystem encoding is
1178
not UTF-8. It may be better to implement an alternative so that we can
1179
safely handle paths that are not properly decodable in the current
1182
_utf8_encode = codecs.getencoder('utf8')
1185
_directory = _directory_kind
1186
_listdir = os.listdir
1187
_kind_from_mode = file_kind_from_stat_mode
1189
pending = [(safe_utf8(prefix), safe_unicode(top))]
1191
relroot, top = pending.pop()
1195
rel_prefix = relroot + '/'
1196
top_slash = top + u'/'
1197
# In plain for loop form
1199
for name in sorted(_listdir(top)):
1200
name_utf8 = _utf8_encode(name)[0]
1201
abspath = top_slash + name
1202
statvalue = _lstat(abspath)
1203
kind = _kind_from_mode(statvalue.st_mode)
1204
dirblock.append((rel_prefix + name_utf8, name_utf8,
1205
kind, statvalue, abspath))
1171
1206
yield (relroot, top), dirblock
1172
1207
# push the user specified dirs from dirblock
1173
1208
pending.extend((d[0], d[4])