~bzr-pqm/bzr/bzr.dev

1739.2.6 by Robert Collins
Merge bzr.dev
1
# Copyright (C) 2006, 2008 Canonical Ltd
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1739.2.7 by Robert Collins
Update readdir pyrex source files and usage in line with current practice.
17
"""Wrapper for readdir which returns files ordered by inode."""
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
18
19
20
import os
21
import sys
22
3731.1.1 by Robert Collins
* The C extensions now build on python 2.4 (Robert Collins, #271939)
23
#python2.4 support
24
cdef extern from "python-compat.h":
25
    pass
26
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
27
28
# the opaque C library DIR type.
29
cdef extern from 'errno.h':
30
    int ENOENT
1739.2.6 by Robert Collins
Merge bzr.dev
31
    int ENOTDIR
32
    int EAGAIN
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
33
    int errno
34
    char *strerror(int errno)
35
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
36
cdef extern from 'unistd.h':
37
    int chdir(char *path)
38
    char *getcwd(char *, int size)
39
40
cdef extern from 'stdlib.h':
41
    void *malloc(int)
42
    void free(void *)
43
3696.3.10 by Robert Collins
Review feedback.
44
45
cdef extern from 'sys/types.h':
46
    ctypedef long ssize_t
47
    ctypedef unsigned long size_t
48
    ctypedef long time_t
49
    ctypedef unsigned long ino_t
50
    ctypedef unsigned long long off_t
51
52
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
53
cdef extern from 'sys/stat.h':
54
    cdef struct stat:
55
        int st_mode
3696.3.10 by Robert Collins
Review feedback.
56
        off_t st_size
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
57
        int st_dev
3696.3.10 by Robert Collins
Review feedback.
58
        ino_t st_ino
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
59
        int st_mtime
60
        int st_ctime
61
    int lstat(char *path, stat *buf)
62
    int S_ISDIR(int mode)
63
    int S_ISCHR(int mode)
64
    int S_ISBLK(int mode)
65
    int S_ISREG(int mode)
66
    int S_ISFIFO(int mode)
67
    int S_ISLNK(int mode)
68
    int S_ISSOCK(int mode)
69
70
71
cdef extern from 'Python.h':
72
    char * PyString_AS_STRING(object)
73
    ctypedef int Py_ssize_t # Required for older pyrex versions
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
74
    ctypedef struct PyObject:
75
        pass
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
76
    Py_ssize_t PyString_Size(object s)
77
    object PyList_GetItem(object lst, Py_ssize_t index)
78
    void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
79
    int PyList_Append(object lst, object item) except -1
80
    void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
81
    int PyTuple_SetItem(void *, Py_ssize_t pos, object item) except -1
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
82
    int PyTuple_SetItem_obj "PyTuple_SetItem" (void *, Py_ssize_t pos, PyObject * item) except -1
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
83
    void Py_INCREF(object o)
84
    void Py_DECREF(object o)
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
85
    void PyString_Concat(PyObject **string, object newpart)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
86
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
87
88
cdef extern from 'dirent.h':
89
    ctypedef struct dirent:
90
        char d_name[256]
3696.3.6 by Robert Collins
Partial review feedback fixups.
91
        ino_t d_ino
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
92
    ctypedef struct DIR
93
    # should be DIR *, pyrex barfs.
1739.2.6 by Robert Collins
Merge bzr.dev
94
    DIR * opendir(char * name)
95
    int closedir(DIR * dir)
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
96
    dirent *readdir(DIR *dir)
97
98
_directory = 'directory'
99
_chardev = 'chardev'
100
_block = 'block'
101
_file = 'file'
102
_fifo = 'fifo'
103
_symlink = 'symlink'
104
_socket = 'socket'
105
_unknown = 'unknown'
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
106
_missing = 'missing'
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
107
108
# add a typedef struct dirent dirent to workaround pyrex
109
cdef extern from 'readdir.h':
110
    pass
111
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
112
113
cdef class _Stat:
114
    """Represent a 'stat' result."""
115
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
116
    cdef stat _st
117
118
    property st_dev:
119
        def __get__(self):
120
            return self._st.st_dev
121
122
    property st_ino:
123
        def __get__(self):
124
            return self._st.st_ino
125
126
    property st_mode:
127
        def __get__(self):
128
            return self._st.st_mode
129
130
    property st_ctime:
131
        def __get__(self):
132
            return self._st.st_ctime
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
133
134
    property st_mtime:
135
        def __get__(self):
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
136
            return self._st.st_mtime
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
137
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
138
    property st_size:
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
139
        def __get__(self):
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
140
            return self._st.st_size
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
141
142
    def __repr__(self):
143
        """Repr is the same as a Stat object.
144
3696.3.6 by Robert Collins
Partial review feedback fixups.
145
        (mode, ino, dev, nlink, uid, gid, size, None(atime), mtime, ctime)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
146
        """
147
        return repr((self.st_mode, 0, 0, 0, 0, 0, self.st_size, None,
148
                     self._mtime, self._ctime))
149
150
151
from bzrlib import osutils
152
153
154
cdef class UTF8DirReader:
155
    """A dir reader for utf8 file systems."""
156
157
    cdef readonly object _safe_utf8
158
    cdef _directory, _chardev, _block, _file, _fifo, _symlink
159
    cdef _socket, _unknown
160
161
    def __init__(self):
162
        self._safe_utf8 = osutils.safe_utf8
163
        self._directory = _directory
164
        self._chardev = _chardev
165
        self._block = _block
166
        self._file = _file
167
        self._fifo = _fifo
168
        self._symlink = _symlink
169
        self._socket = _socket
170
        self._unknown = _unknown
171
172
    def kind_from_mode(self, int mode):
173
        """Get the kind of a path from a mode status."""
174
        return self._kind_from_mode(mode)
175
176
    cdef _kind_from_mode(self, int mode):
3696.3.10 by Robert Collins
Review feedback.
177
        # Files and directories are the most common - check them first.
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
178
        if S_ISREG(mode):
179
            return self._file
180
        if S_ISDIR(mode):
181
            return self._directory
182
        if S_ISCHR(mode):
183
            return self._chardev
184
        if S_ISBLK(mode):
185
            return self._block
186
        if S_ISLNK(mode):
187
            return self._symlink
188
        if S_ISFIFO(mode):
189
            return self._fifo
190
        if S_ISSOCK(mode):
191
            return self._socket
192
        return self._unknown
193
194
    def top_prefix_to_starting_dir(self, top, prefix=""):
195
        """See DirReader.top_prefix_to_starting_dir."""
196
        return (self._safe_utf8(prefix), None, None, None,
197
            self._safe_utf8(top))
198
199
    def read_dir(self, prefix, top):
200
        """Read a single directory from a utf8 file system.
201
202
        All paths in and out are utf8.
203
204
        This sub-function is called when we know the filesystem is already in utf8
205
        encoding. So we don't need to transcode filenames.
206
207
        See DirReader.read_dir for details.
208
        """
209
        #cdef char *_prefix = prefix
210
        #cdef char *_top = top
211
        # Use C accelerated directory listing.
212
        cdef object newval
213
        cdef int index
214
        cdef int length
215
        cdef void * atuple
216
        cdef object name
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
217
        cdef PyObject * new_val_obj
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
218
219
        if PyString_Size(prefix):
220
            relprefix = prefix + '/'
221
        else:
222
            relprefix = ''
223
        top_slash = top + '/'
224
225
        # read_dir supplies in should-stat order.
226
        # for _, name in sorted(_listdir(top)):
227
        result = _read_dir(top)
228
        length = len(result)
229
        # result.sort()
230
        for index from 0 <= index < length:
231
            atuple = PyList_GetItem_object_void(result, index)
232
            name = <object>PyTuple_GetItem_void_void(atuple, 1)
3696.3.10 by Robert Collins
Review feedback.
233
            # We have a tuple with (inode, name, None, statvalue, None)
234
            # Now edit it:
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
235
            # inode -> path_from_top
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
236
            # direct concat - faster than operator +.
237
            new_val_obj = <PyObject *>relprefix
238
            Py_INCREF(relprefix)
239
            PyString_Concat(&new_val_obj, name)
240
            if NULL == new_val_obj:
241
                # PyString_Concat will have setup an exception, but how to get
242
                # at it?
243
                raise Exception("failed to strcat")
244
            PyTuple_SetItem_obj(atuple, 0, new_val_obj)
3696.3.10 by Robert Collins
Review feedback.
245
            # 1st None -> kind
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
246
            newval = self._kind_from_mode(
247
                (<_Stat>PyTuple_GetItem_void_void(atuple, 3)).st_mode)
248
            Py_INCREF(newval)
249
            PyTuple_SetItem(atuple, 2, newval)
3696.3.10 by Robert Collins
Review feedback.
250
            # 2nd None -> abspath # for all - the caller may need to stat files
251
            # etc.
3696.3.7 by Robert Collins
Use PyString_Concat directly for another small boost.
252
            # direct concat - faster than operator +.
253
            new_val_obj = <PyObject *>top_slash
254
            Py_INCREF(top_slash)
255
            PyString_Concat(&new_val_obj, name)
256
            if NULL == new_val_obj:
257
                # PyString_Concat will have setup an exception, but how to get
258
                # at it?
259
                raise Exception("failed to strcat")
260
            PyTuple_SetItem_obj(atuple, 4, new_val_obj)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
261
        return result
262
263
264
cdef _read_dir(path):
1739.2.11 by Robert Collins
Docstring and copyright header update per Martin's review.
265
    """Like os.listdir, this reads the contents of a directory.
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
266
267
    :param path: the directory to list.
3696.3.10 by Robert Collins
Review feedback.
268
    :return: a list of single-owner (the list) tuples ready for editing into
269
        the result tuples walkdirs needs to yield. They contain (inode, name,
270
        None, statvalue, None).
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
271
    """
272
    cdef DIR *the_dir
273
    # currently this needs a fixup - the C code says 'dirent' but should say
274
    # 'struct dirent'
275
    cdef dirent * entry
1739.2.6 by Robert Collins
Merge bzr.dev
276
    cdef dirent sentinel
277
    cdef char *name
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
278
    cdef int stat_result
279
    cdef _Stat statvalue
280
    cdef char *cwd
281
282
    cwd = getcwd(NULL, 0)
283
    if -1 == chdir(path):
284
        raise OSError(errno, strerror(errno))
285
    the_dir = opendir(".")
1739.2.6 by Robert Collins
Merge bzr.dev
286
    if NULL == the_dir:
287
        raise OSError(errno, strerror(errno))
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
288
    result = []
289
    try:
1739.2.6 by Robert Collins
Merge bzr.dev
290
        entry = &sentinel
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
291
        while entry != NULL:
1739.2.6 by Robert Collins
Merge bzr.dev
292
            entry = readdir(the_dir)
293
            if entry == NULL:
294
                if errno == EAGAIN:
295
                    # try again
296
                    continue
297
                elif errno != ENOTDIR and errno != ENOENT and errno != 0:
298
                    # We see ENOTDIR at the end of a normal directory.
299
                    # As ENOTDIR for read_dir(file) is triggered on opendir,
300
                    # we consider ENOTDIR to be 'no error'.
301
                    # ENOENT is listed as 'invalid position in the dir stream' for
302
                    # readdir. We swallow this for now and just keep reading.
303
                    raise OSError(errno, strerror(errno))
304
                else:
305
                    # done
306
                    continue
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
307
            name = entry.d_name
3696.3.6 by Robert Collins
Partial review feedback fixups.
308
            if not (name[0] == c"." and (
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
309
                (name[1] == 0) or 
3696.3.6 by Robert Collins
Partial review feedback fixups.
310
                (name[1] == c"." and name[2] == 0))
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
311
                ):
3696.3.8 by Robert Collins
Just embed a struct st in the python result object, avoids converting things we don't need converted, and copying values around always.
312
                statvalue = _Stat()
313
                stat_result = lstat(entry.d_name, &statvalue._st)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
314
                if stat_result != 0:
315
                    if errno != ENOENT:
316
                        raise OSError(errno, strerror(errno))
317
                    else:
318
                        kind = _missing
319
                        statvalue = None
320
                # We append a 5-tuple that can be modified in-place by the C
321
                # api:
3696.3.6 by Robert Collins
Partial review feedback fixups.
322
                # inode to sort on (to replace with top_path)
323
                # name (to keep)
324
                # kind (None, to set)
325
                # statvalue (to keep)
326
                # abspath (None, to set)
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
327
                PyList_Append(result, (entry.d_ino, entry.d_name, None,
328
                    statvalue, None))
1739.2.6 by Robert Collins
Merge bzr.dev
329
    finally:
3696.3.5 by Robert Collins
Streamline _walkdirs_utf8 for utf8 file systems, reducing time to traverse a mozilla tree from 1s to .6 seconds. (Robert Collins)
330
        if -1 == chdir(cwd):
331
            free(cwd)
332
            raise OSError(errno, strerror(errno))
333
        free(cwd)
1739.2.6 by Robert Collins
Merge bzr.dev
334
        if -1 == closedir(the_dir):
1739.2.3 by Robert Collins
Add a replacement for os.listdir which returns file kind information from readdir when it is available. This drops our osutils.walkdirs time further, down to 77ms.
335
            raise OSError(errno, strerror(errno))
336
    return result
1739.2.6 by Robert Collins
Merge bzr.dev
337
338
339
# vim: tw=79 ai expandtab sw=4 sts=4