1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
|
# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Helper functions for Walkdirs on win32."""
cdef extern from "_walkdirs_win32.h":
struct _HANDLE:
pass
ctypedef _HANDLE *HANDLE
ctypedef unsigned long DWORD
ctypedef long long __int64
ctypedef unsigned short WCHAR
struct _FILETIME:
DWORD dwHighDateTime
DWORD dwLowDateTime
ctypedef _FILETIME FILETIME
struct _WIN32_FIND_DATAW:
DWORD dwFileAttributes
FILETIME ftCreationTime
FILETIME ftLastAccessTime
FILETIME ftLastWriteTime
DWORD nFileSizeHigh
DWORD nFileSizeLow
# Some reserved stuff here
WCHAR cFileName[260] # MAX_PATH
WCHAR cAlternateFilename[14]
# We have to use the typedef trick, otherwise pyrex uses:
# struct WIN32_FIND_DATAW
# which fails due to 'incomplete type'
ctypedef _WIN32_FIND_DATAW WIN32_FIND_DATAW
HANDLE INVALID_HANDLE_VALUE
HANDLE FindFirstFileW(WCHAR *path, WIN32_FIND_DATAW *data)
int FindNextFileW(HANDLE search, WIN32_FIND_DATAW *data)
int FindClose(HANDLE search)
DWORD FILE_ATTRIBUTE_READONLY
DWORD FILE_ATTRIBUTE_DIRECTORY
int ERROR_NO_MORE_FILES
int GetLastError()
# Wide character functions
DWORD wcslen(WCHAR *)
cdef extern from "Python.h":
WCHAR *PyUnicode_AS_UNICODE(object)
Py_ssize_t PyUnicode_GET_SIZE(object)
object PyUnicode_FromUnicode(WCHAR *, Py_ssize_t)
int PyList_Append(object, object) except -1
object PyUnicode_AsUTF8String(object)
import operator
import stat
from bzrlib import osutils
cdef class _Win32Stat:
"""Represent a 'stat' result generated from WIN32_FIND_DATA"""
cdef readonly int st_mode
cdef readonly double st_ctime
cdef readonly double st_mtime
cdef readonly double st_atime
cdef readonly __int64 st_size
# os.stat always returns 0, so we hard code it here
cdef readonly int st_dev
cdef readonly int st_ino
def __repr__(self):
"""Repr is the same as a Stat object.
(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)
"""
return repr((self.st_mode, 0, 0, 0, 0, 0, self.st_size, self.st_atime,
self.st_mtime, self.st_ctime))
cdef object _get_name(WIN32_FIND_DATAW *data):
"""Extract the Unicode name for this file/dir."""
return PyUnicode_FromUnicode(data.cFileName,
wcslen(data.cFileName))
cdef int _get_mode_bits(WIN32_FIND_DATAW *data):
cdef int mode_bits
mode_bits = 0100666 # writeable file, the most common
if data.dwFileAttributes & FILE_ATTRIBUTE_READONLY == FILE_ATTRIBUTE_READONLY:
mode_bits = mode_bits ^ 0222 # remove the write bits
if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY == FILE_ATTRIBUTE_DIRECTORY:
# Remove the FILE bit, set the DIR bit, and set the EXEC bits
mode_bits = mode_bits ^ 0140111
return mode_bits
cdef __int64 _get_size(WIN32_FIND_DATAW *data):
# Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32
# on a DWORD
return ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow
cdef double _ftime_to_timestamp(FILETIME *ft):
"""Convert from a FILETIME struct into a floating point timestamp.
The fields of a FILETIME structure are the hi and lo part
of a 64-bit value expressed in 100 nanosecond units.
1e7 is one second in such units; 1e-7 the inverse.
429.4967296 is 2**32 / 1e7 or 2**32 * 1e-7.
It also uses the epoch 1601-01-01 rather than 1970-01-01
(taken from posixmodule.c)
"""
cdef __int64 val
# NB: This gives slightly different results versus casting to a 64-bit
# integer and doing integer math before casting into a floating
# point number. But the difference is in the sub millisecond range,
# which doesn't seem critical here.
# secs between epochs: 11,644,473,600
val = ((<__int64>ft.dwHighDateTime) << 32) + ft.dwLowDateTime
return (val * 1.0e-7) - 11644473600.0
cdef int _should_skip(WIN32_FIND_DATAW *data):
"""Is this '.' or '..' so we should skip it?"""
if (data.cFileName[0] != c'.'):
return 0
if data.cFileName[1] == c'\0':
return 1
if data.cFileName[1] == c'.' and data.cFileName[2] == c'\0':
return 1
return 0
cdef class Win32Finder:
"""A class which encapsulates the search of files in a given directory"""
cdef object _top
cdef object _prefix
cdef object _directory_kind
cdef object _file_kind
cdef object _pending
cdef object _last_dirblock
def __init__(self, top, prefix=""):
self._top = top
self._prefix = prefix
self._directory_kind = osutils._directory_kind
self._file_kind = osutils._formats[stat.S_IFREG]
self._pending = [(osutils.safe_utf8(prefix), osutils.safe_unicode(top))]
self._last_dirblock = None
def __iter__(self):
return self
cdef object _get_kind(self, WIN32_FIND_DATAW *data):
if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY:
return self._directory_kind
return self._file_kind
cdef _Win32Stat _get_stat_value(self, WIN32_FIND_DATAW *data):
"""Get the filename and the stat information."""
cdef _Win32Stat statvalue
statvalue = _Win32Stat()
statvalue.st_mode = _get_mode_bits(data)
statvalue.st_ctime = _ftime_to_timestamp(&data.ftCreationTime)
statvalue.st_mtime = _ftime_to_timestamp(&data.ftLastWriteTime)
statvalue.st_atime = _ftime_to_timestamp(&data.ftLastAccessTime)
statvalue.st_size = _get_size(data)
statvalue.st_ino = 0
statvalue.st_dev = 0
return statvalue
def _get_files_in(self, directory, relprefix):
"""Return the dirblock for all files in the given directory.
:param directory: A path that can directly access the files on disk.
Should be a Unicode object.
:param relprefix: A psuedo path for these files (as inherited from the
original 'prefix=XXX' when instantiating this class.)
It should be a UTF-8 string.
:return: A dirblock for all the files of the form
[(utf8_relpath, utf8_fname, kind, _Win32Stat, unicode_abspath)]
"""
cdef WIN32_FIND_DATAW search_data
cdef HANDLE hFindFile
cdef int last_err
cdef WCHAR *query
cdef int result
top_star = directory + '*'
dirblock = []
query = PyUnicode_AS_UNICODE(top_star)
hFindFile = FindFirstFileW(query, &search_data)
if hFindFile == INVALID_HANDLE_VALUE:
# Raise an exception? This path doesn't seem to exist
raise WindowsError(GetLastError(), top_star)
try:
result = 1
while result:
# Skip '.' and '..'
if _should_skip(&search_data):
result = FindNextFileW(hFindFile, &search_data)
continue
name_unicode = _get_name(&search_data)
name_utf8 = PyUnicode_AsUTF8String(name_unicode)
PyList_Append(dirblock,
(relprefix + name_utf8, name_utf8,
self._get_kind(&search_data),
self._get_stat_value(&search_data),
directory + name_unicode))
result = FindNextFileW(hFindFile, &search_data)
# FindNextFileW sets GetLastError() == ERROR_NO_MORE_FILES when it
# actually finishes. If we have anything else, then we have a
# genuine problem
last_err = GetLastError()
if last_err != ERROR_NO_MORE_FILES:
raise WindowsError(last_err)
finally:
result = FindClose(hFindFile)
if result == 0:
last_err = GetLastError()
# TODO: We should probably raise an exception if FindClose
# returns an error, however, I don't want to supress an
# earlier Exception, so for now, I'm ignoring this
return dirblock
cdef _update_pending(self):
"""If we had a result before, add the subdirs to pending."""
if self._last_dirblock is not None:
# push the entries left in the dirblock onto the pending queue
# we do this here, because we allow the user to modified the
# queue before the next iteration
for d in reversed(self._last_dirblock):
if d[2] == self._directory_kind:
self._pending.append((d[0], d[-1]))
self._last_dirblock = None
def __next__(self):
self._update_pending()
if not self._pending:
raise StopIteration()
relroot, top = self._pending.pop()
# NB: At the moment Pyrex doesn't support Unicode literals, which means
# that all of these string literals are going to be upcasted to Unicode
# at runtime... :(
# Maybe we could use unicode(x) during __init__?
if relroot:
relprefix = relroot + '/'
else:
relprefix = ''
top_slash = top + '/'
dirblock = self._get_files_in(top_slash, relprefix)
dirblock.sort(key=operator.itemgetter(1))
self._last_dirblock = dirblock
return (relroot, top), dirblock
def _walkdirs_utf8_win32_find_file(top, prefix=""):
"""Implement a version of walkdirs_utf8 for win32.
This uses the find files api to both list the files and to stat them.
"""
return Win32Finder(top, prefix=prefix)
|