~bzr-pqm/bzr/bzr.dev

4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
1
# Copyright (C) 2006, 2008, 2009 Canonical Ltd
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
16
1739.2.7 by Robert Collins
Update readdir pyrex source files and usage in line with current practice.
17
"""Wrapper for readdir which returns files ordered by inode."""
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
18
19
20
import os
21
import sys
22
3731.1.1 by Robert Collins
* The C extensions now build on python 2.4 (Robert Collins, #271939)
23
#python2.4 support
24
cdef extern from "python-compat.h":
25
    pass
26
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
27
28
cdef extern from 'errno.h':
29
    int ENOENT
1739.2.6 by Robert Collins
Merge bzr.dev
30
    int ENOTDIR
31
    int EAGAIN
3766.1.5 by Martin Pool
add missing pyrex import
32
    int EINTR
3766.1.6 by Martin Pool
We need a 'global' declaration to assign to errno; and fix comments
33
    char *strerror(int errno)
34
    # not necessarily a real variable, but this should be close enough
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
35
    int errno
36
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
37
cdef extern from 'unistd.h':
38
    int chdir(char *path)
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
39
    int close(int fd)
40
    int fchdir(int fd)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
41
    char *getcwd(char *, int size)
42
43
cdef extern from 'stdlib.h':
44
    void *malloc(int)
45
    void free(void *)
46
3696.3.10 by Robert Collins
Review feedback.
47
48
cdef extern from 'sys/types.h':
49
    ctypedef long ssize_t
50
    ctypedef unsigned long size_t
51
    ctypedef long time_t
52
    ctypedef unsigned long ino_t
53
    ctypedef unsigned long long off_t
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
54
    ctypedef int mode_t
3696.3.10 by Robert Collins
Review feedback.
55
56
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
57
cdef extern from 'sys/stat.h':
58
    cdef struct stat:
59
        int st_mode
3696.3.10 by Robert Collins
Review feedback.
60
        off_t st_size
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
61
        int st_dev
3696.3.10 by Robert Collins
Review feedback.
62
        ino_t st_ino
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
63
        int st_mtime
64
        int st_ctime
65
    int lstat(char *path, stat *buf)
66
    int S_ISDIR(int mode)
67
    int S_ISCHR(int mode)
68
    int S_ISBLK(int mode)
69
    int S_ISREG(int mode)
70
    int S_ISFIFO(int mode)
71
    int S_ISLNK(int mode)
72
    int S_ISSOCK(int mode)
73
74
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
75
cdef extern from 'fcntl.h':
76
    int O_RDONLY
77
    int open(char *pathname, int flags, mode_t mode)
78
79
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
80
cdef extern from 'Python.h':
81
    char * PyString_AS_STRING(object)
82
    ctypedef int Py_ssize_t # Required for older pyrex versions
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
83
    ctypedef struct PyObject:
84
        pass
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
85
    Py_ssize_t PyString_Size(object s)
86
    object PyList_GetItem(object lst, Py_ssize_t index)
87
    void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
88
    int PyList_Append(object lst, object item) except -1
89
    void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
90
    int PyTuple_SetItem(void *, Py_ssize_t pos, object item) except -1
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
91
    int PyTuple_SetItem_obj "PyTuple_SetItem" (void *, Py_ssize_t pos, PyObject * item) except -1
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
92
    void Py_INCREF(object o)
93
    void Py_DECREF(object o)
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
94
    void PyString_Concat(PyObject **string, object newpart)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
95
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
96
97
cdef extern from 'dirent.h':
98
    ctypedef struct dirent:
99
        char d_name[256]
3696.3.6 by Robert Collins
Partial review feedback fixups.
100
        ino_t d_ino
3766.1.6 by Martin Pool
We need a 'global' declaration to assign to errno; and fix comments
101
    # the opaque C library DIR type.
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
102
    ctypedef struct DIR
103
    # should be DIR *, pyrex barfs.
1739.2.6 by Robert Collins
Merge bzr.dev
104
    DIR * opendir(char * name)
105
    int closedir(DIR * dir)
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
106
    dirent *readdir(DIR *dir)
107
108
_directory = 'directory'
109
_chardev = 'chardev'
110
_block = 'block'
111
_file = 'file'
112
_fifo = 'fifo'
113
_symlink = 'symlink'
114
_socket = 'socket'
115
_unknown = 'unknown'
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
116
_missing = 'missing'
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
117
118
# add a typedef struct dirent dirent to workaround pyrex
119
cdef extern from 'readdir.h':
120
    pass
121
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
122
123
cdef class _Stat:
124
    """Represent a 'stat' result."""
125
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
126
    cdef stat _st
127
128
    property st_dev:
129
        def __get__(self):
130
            return self._st.st_dev
131
132
    property st_ino:
133
        def __get__(self):
134
            return self._st.st_ino
135
136
    property st_mode:
137
        def __get__(self):
138
            return self._st.st_mode
139
140
    property st_ctime:
141
        def __get__(self):
142
            return self._st.st_ctime
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
143
144
    property st_mtime:
145
        def __get__(self):
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
146
            return self._st.st_mtime
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
147
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
148
    property st_size:
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
149
        def __get__(self):
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
150
            return self._st.st_size
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
151
152
    def __repr__(self):
153
        """Repr is the same as a Stat object.
154
3696.3.6 by Robert Collins
Partial review feedback fixups.
155
        (mode, ino, dev, nlink, uid, gid, size, None(atime), mtime, ctime)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
156
        """
157
        return repr((self.st_mode, 0, 0, 0, 0, 0, self.st_size, None,
158
                     self._mtime, self._ctime))
159
160
161
from bzrlib import osutils
162
163
164
cdef class UTF8DirReader:
165
    """A dir reader for utf8 file systems."""
166
167
    cdef readonly object _safe_utf8
168
    cdef _directory, _chardev, _block, _file, _fifo, _symlink
169
    cdef _socket, _unknown
170
171
    def __init__(self):
172
        self._safe_utf8 = osutils.safe_utf8
173
        self._directory = _directory
174
        self._chardev = _chardev
175
        self._block = _block
176
        self._file = _file
177
        self._fifo = _fifo
178
        self._symlink = _symlink
179
        self._socket = _socket
180
        self._unknown = _unknown
181
182
    def kind_from_mode(self, int mode):
183
        """Get the kind of a path from a mode status."""
184
        return self._kind_from_mode(mode)
185
186
    cdef _kind_from_mode(self, int mode):
3696.3.10 by Robert Collins
Review feedback.
187
        # Files and directories are the most common - check them first.
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
188
        if S_ISREG(mode):
189
            return self._file
190
        if S_ISDIR(mode):
191
            return self._directory
192
        if S_ISCHR(mode):
193
            return self._chardev
194
        if S_ISBLK(mode):
195
            return self._block
196
        if S_ISLNK(mode):
197
            return self._symlink
198
        if S_ISFIFO(mode):
199
            return self._fifo
200
        if S_ISSOCK(mode):
201
            return self._socket
202
        return self._unknown
203
204
    def top_prefix_to_starting_dir(self, top, prefix=""):
205
        """See DirReader.top_prefix_to_starting_dir."""
206
        return (self._safe_utf8(prefix), None, None, None,
207
            self._safe_utf8(top))
208
209
    def read_dir(self, prefix, top):
210
        """Read a single directory from a utf8 file system.
211
212
        All paths in and out are utf8.
213
214
        This sub-function is called when we know the filesystem is already in utf8
215
        encoding. So we don't need to transcode filenames.
216
217
        See DirReader.read_dir for details.
218
        """
219
        #cdef char *_prefix = prefix
220
        #cdef char *_top = top
221
        # Use C accelerated directory listing.
222
        cdef object newval
223
        cdef int index
224
        cdef int length
225
        cdef void * atuple
226
        cdef object name
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
227
        cdef PyObject * new_val_obj
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
228
229
        if PyString_Size(prefix):
230
            relprefix = prefix + '/'
231
        else:
232
            relprefix = ''
233
        top_slash = top + '/'
234
235
        # read_dir supplies in should-stat order.
236
        # for _, name in sorted(_listdir(top)):
237
        result = _read_dir(top)
238
        length = len(result)
239
        # result.sort()
240
        for index from 0 <= index < length:
241
            atuple = PyList_GetItem_object_void(result, index)
242
            name = <object>PyTuple_GetItem_void_void(atuple, 1)
3696.3.10 by Robert Collins
Review feedback.
243
            # We have a tuple with (inode, name, None, statvalue, None)
244
            # Now edit it:
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
245
            # inode -> path_from_top
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
246
            # direct concat - faster than operator +.
247
            new_val_obj = <PyObject *>relprefix
248
            Py_INCREF(relprefix)
249
            PyString_Concat(&new_val_obj, name)
250
            if NULL == new_val_obj:
251
                # PyString_Concat will have setup an exception, but how to get
252
                # at it?
253
                raise Exception("failed to strcat")
254
            PyTuple_SetItem_obj(atuple, 0, new_val_obj)
3696.3.10 by Robert Collins
Review feedback.
255
            # 1st None -> kind
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
256
            newval = self._kind_from_mode(
257
                (<_Stat>PyTuple_GetItem_void_void(atuple, 3)).st_mode)
258
            Py_INCREF(newval)
259
            PyTuple_SetItem(atuple, 2, newval)
3696.3.10 by Robert Collins
Review feedback.
260
            # 2nd None -> abspath # for all - the caller may need to stat files
261
            # etc.
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
262
            # direct concat - faster than operator +.
263
            new_val_obj = <PyObject *>top_slash
264
            Py_INCREF(top_slash)
265
            PyString_Concat(&new_val_obj, name)
266
            if NULL == new_val_obj:
267
                # PyString_Concat will have setup an exception, but how to get
268
                # at it?
269
                raise Exception("failed to strcat")
270
            PyTuple_SetItem_obj(atuple, 4, new_val_obj)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
271
        return result
272
273
274
cdef _read_dir(path):
1739.2.11 by Robert Collins
Docstring and copyright header update per Martin's review.
275
    """Like os.listdir, this reads the contents of a directory.
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
276
277
    :param path: the directory to list.
3696.3.10 by Robert Collins
Review feedback.
278
    :return: a list of single-owner (the list) tuples ready for editing into
279
        the result tuples walkdirs needs to yield. They contain (inode, name,
280
        None, statvalue, None).
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
281
    """
282
    cdef DIR *the_dir
283
    # currently this needs a fixup - the C code says 'dirent' but should say
284
    # 'struct dirent'
285
    cdef dirent * entry
1739.2.6 by Robert Collins
Merge bzr.dev
286
    cdef dirent sentinel
287
    cdef char *name
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
288
    cdef int stat_result
289
    cdef _Stat statvalue
3766.1.6 by Martin Pool
We need a 'global' declaration to assign to errno; and fix comments
290
    global errno
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
291
    cdef int orig_dir_fd
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
292
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
293
    # Avoid chdir('') because it causes problems on Sun OS, and avoid this if
294
    # staying in .
295
    if path != "" and path != '.':
296
        # we change into the requested directory before reading, and back at the
297
        # end, because that turns out to make the stat calls measurably faster than
298
        # passing full paths every time.
299
        orig_dir_fd = open(".", O_RDONLY, 0)
300
        if orig_dir_fd == -1:
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
301
            raise OSError(errno, "open: " + strerror(errno), ".")
3841.1.2 by Martin Pool
Don't call chdir('')
302
        if -1 == chdir(path):
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
303
            raise OSError(errno, "chdir: " + strerror(errno), path)
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
304
    else:
305
        orig_dir_fd = -1
306
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
307
    try:
3841.1.1 by Martin Pool
Fix try/finally block after chdir in readdir_pyx
308
        the_dir = opendir(".")
309
        if NULL == the_dir:
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
310
            raise OSError(errno, "opendir: " + strerror(errno), path)
3841.1.1 by Martin Pool
Fix try/finally block after chdir in readdir_pyx
311
        try:
312
            result = []
313
            entry = &sentinel
314
            while entry != NULL:
315
                # Unlike most libc functions, readdir needs errno set to 0
316
                # beforehand so that eof can be distinguished from errors.  See
317
                # <https://bugs.launchpad.net/bzr/+bug/279381>
318
                while True:
3841.1.5 by Martin Pool
Review cleanups on readdir
319
                    errno = 0
3841.1.1 by Martin Pool
Fix try/finally block after chdir in readdir_pyx
320
                    entry = readdir(the_dir)
321
                    if entry == NULL and (errno == EAGAIN or errno == EINTR):
322
                        # try again
323
                        continue
324
                    else:
325
                        break
326
                if entry == NULL:
327
                    if errno == ENOTDIR or errno == 0:
328
                        # We see ENOTDIR at the end of a normal directory.
329
                        # As ENOTDIR for read_dir(file) is triggered on opendir,
330
                        # we consider ENOTDIR to be 'no error'.
331
                        continue
332
                    else:
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
333
                        raise OSError(errno, "readdir: " + strerror(errno), path)
3841.1.1 by Martin Pool
Fix try/finally block after chdir in readdir_pyx
334
                name = entry.d_name
335
                if not (name[0] == c"." and (
336
                    (name[1] == 0) or 
337
                    (name[1] == c"." and name[2] == 0))
338
                    ):
339
                    statvalue = _Stat()
340
                    stat_result = lstat(entry.d_name, &statvalue._st)
341
                    if stat_result != 0:
342
                        if errno != ENOENT:
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
343
                            raise OSError(errno, "lstat: " + strerror(errno),
344
                                path + "/" + entry.d_name)
3841.1.1 by Martin Pool
Fix try/finally block after chdir in readdir_pyx
345
                        else:
346
                            kind = _missing
347
                            statvalue = None
348
                    # We append a 5-tuple that can be modified in-place by the C
349
                    # api:
350
                    # inode to sort on (to replace with top_path)
351
                    # name (to keep)
352
                    # kind (None, to set)
353
                    # statvalue (to keep)
354
                    # abspath (None, to set)
355
                    PyList_Append(result, (entry.d_ino, entry.d_name, None,
356
                        statvalue, None))
357
        finally:
358
            if -1 == closedir(the_dir):
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
359
                raise OSError(errno, "closedir: " + strerror(errno), path)
1739.2.6 by Robert Collins
Merge bzr.dev
360
    finally:
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
361
        if -1 != orig_dir_fd:
3841.1.5 by Martin Pool
Review cleanups on readdir
362
            failed = False
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
363
            if -1 == fchdir(orig_dir_fd):
3841.1.5 by Martin Pool
Review cleanups on readdir
364
                # try to close the original directory anyhow
365
                failed = True
366
            if -1 == close(orig_dir_fd) or failed:
4095.1.3 by Martin Pool
Add test for failures inside pyrex readdir
367
                raise OSError(errno, "return to orig_dir: " + strerror(errno))
3841.1.4 by Martin Pool
Use open/fchdir rather than getcwd/chdir to save and restore directory location
368
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
369
    return result
1739.2.6 by Robert Collins
Merge bzr.dev
370
371
372
# vim: tw=79 ai expandtab sw=4 sts=4