~bzr-pqm/bzr/bzr.dev

2474.1.1 by John Arbash Meinel
Create a Pyrex extension for reading the dirstate file.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Helper functions for DirState.
18
19
This is the python implementation for DirState functions.
20
"""
21
22
from bzrlib.dirstate import DirState
23
24
2474.1.7 by John Arbash Meinel
Add some tests for a helper function that lets us
25
cdef extern from *:
26
    ctypedef int size_t
27
2474.1.4 by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.
28
cdef extern from "Python.h":
29
    # GetItem returns a borrowed reference
30
    void *PyDict_GetItem(object p, object key)
31
    int PyDict_SetItem(object p, object key, object val) except -1
32
    object PyList_GetItem(object lst, int index)
33
    object PyTuple_GetItem(object tpl, int index)
34
2474.1.7 by John Arbash Meinel
Add some tests for a helper function that lets us
35
    char* PyString_AsString(object p)
36
    int PyString_Size(object p)
37
38
39
cdef extern from "string.h":
40
    int strncmp(char *s1, char *s2, size_t len)
41
    int strcmp(char *s1, char *s2)
42
    char *strchr(char *s1, char c)
43
2474.1.4 by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.
44
45
cdef object _split_from_path(object cache, object path):
46
    """get the dirblock tuple for a given path.
47
48
    :param cache: A Dictionary mapping string paths to tuples
49
    :param path: The path we care about.
50
    :return: A borrowed reference to a tuple stored in cache.
51
        You do not need to Py_DECREF() when you are done, unless you plan on
52
        using it for a while.
53
    """
54
    cdef void* value_ptr
55
    cdef object value
56
57
    value_ptr = PyDict_GetItem(cache, path)
58
    if value_ptr == NULL:
59
        value = path.split('/')
60
        PyDict_SetItem(cache, path, value)
61
    else:
62
        value = <object>value_ptr
63
64
    return value
65
66
2474.1.5 by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster.
67
cdef int _bisect_dirblock_nocache(object dirblocks, object dirname, int _lo, int _hi):
68
    cdef int _mid
69
    cdef object cur
70
    cdef object cur_split
71
    cdef object dirname_split
72
73
    dirname_split = dirname.split('/')
74
75
    while _lo < _hi:
76
        _mid = (_lo+_hi)/2
77
        # Grab the dirname for the current dirblock
78
        cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
79
        cur_split = cur.split('/')
80
        if cur_split < dirname_split: _lo = _mid+1
81
        else: _hi = _mid
82
    return _lo
83
84
2474.1.7 by John Arbash Meinel
Add some tests for a helper function that lets us
85
cdef int _cmp_dirblock_strings(char *path1, int size1, char *path2, int size2):
86
    """This compares 2 strings separating on path sections.
87
88
    This is equivalent to "cmp(path1.split('/'), path2.split('/'))"
89
    However, we don't want to create an extra object for doing the split.
90
91
    :param path1: The first path to compare
92
    :param size1: The length of the first path
93
    :param path2: The second path
94
    :param size1: The length of the second path
95
    :return: 0 if they are equal, -1 if path1 comes first, 1 if path2 comes
96
        first
97
    """
98
    cdef char *base1
99
    cdef char *base2
100
    cdef char *tip1
101
    cdef char *tip2
102
    cdef char *end1
103
    cdef char *end2
104
    cdef int cur_len1
105
    cdef int cur_len2
106
    cdef int cmp_len
107
    cdef int diff
108
109
    base1 = path1
110
    base2 = path2
111
    end1 = base1 + size1
112
    end2 = base2 + size2
113
114
    # Ensure that we are pointing to the final NULL terminator on both ends
115
    assert end1[0] == c'\x00'
116
    assert end2[0] == c'\x00'
117
118
    while base1 < end1 and base2 < end2:
119
        # Find the next path separator
120
        # (This is where you would like strchrnul)
121
        tip1 = strchr(base1, c'/')
122
        tip2 = strchr(base2, c'/')
123
124
        if tip1 == NULL:
125
            tip1 = end1
126
        if tip2 == NULL:
127
            tip2 = end2
128
129
        cur_len1 = tip1 - base1
130
        cur_len2 = tip2 - base2
131
        cmp_len = cur_len1
132
        if cur_len2 < cur_len1:
133
            cmp_len = cur_len2
134
135
        diff = strncmp(base1, base2, cmp_len)
136
        # print 'comparing "%s", "%s", %d = %d' % (base1, base2, cmp_len, diff)
137
        if diff != 0:
138
            return diff
139
        if cur_len1 < cur_len2:
140
            return -1
141
        elif cur_len1 > cur_len2:
142
            return 1
143
        base1 = tip1+1
144
        base2 = tip2+1
145
    # Do we still have uncompared characters?
146
    if base1 < end1:
147
        return 1
148
    if base2 < end2:
149
        return -1
150
    return 0
151
152
153
def cmp_dirblock_strings(path1, path2):
154
    """Compare to python strings in dirblock fashion."""
155
    return _cmp_dirblock_strings(PyString_AsString(path1),
156
                                 PyString_Size(path1),
157
                                 PyString_AsString(path2),
158
                                 PyString_Size(path2))
159
160
2474.1.5 by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster.
161
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
2474.1.4 by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.
162
    """Return the index where to insert dirname into the dirblocks.
163
164
    The return value idx is such that all directories blocks in dirblock[:idx]
165
    have names < dirname, and all blocks in dirblock[idx:] have names >=
166
    dirname.
167
168
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
169
    slice of a to be searched.
170
    """
171
    cdef int _lo
172
    cdef int _hi
173
    cdef int _mid
174
    cdef object dirname_split
175
    cdef object cur_split
176
177
    if hi is None:
178
        _hi = len(dirblocks)
179
    else:
180
        _hi = hi
181
182
    _lo = lo
2474.1.5 by John Arbash Meinel
Implement explicit handling of the no-cache version, which is even faster.
183
    if cache is None:
184
        return _bisect_dirblock_nocache(dirblocks, dirname, _lo, _hi)
185
2474.1.4 by John Arbash Meinel
Add benchmarks for dirstate.bisect_dirblocks, and implement bisect_dirblocks in pyrex.
186
    dirname_split = _split_from_path(cache, dirname)
187
    while _lo < _hi:
188
        _mid = (_lo+_hi)/2
189
        # Grab the dirname for the current dirblock
190
        cur = PyTuple_GetItem(PyList_GetItem(dirblocks, _mid), 0)
191
        cur_split = _split_from_path(cache, cur)
192
        if cur_split < dirname_split: _lo = _mid+1
193
        else: _hi = _mid
194
    return _lo
195
196
2474.1.1 by John Arbash Meinel
Create a Pyrex extension for reading the dirstate file.
197
def _read_dirblocks(state):
198
    """Read in the dirblocks for the given DirState object.
199
200
    This is tightly bound to the DirState internal representation. It should be
201
    thought of as a member function, which is only separated out so that we can
202
    re-write it in pyrex.
203
204
    :param state: A DirState object.
205
    :return: None
206
    """
207
    cdef int pos
208
    cdef int entry_size
209
    cdef int field_count
210
211
    state._state_file.seek(state._end_of_header)
212
    text = state._state_file.read()
213
    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
214
215
    fields = text.split('\0')
216
    # Remove the last blank entry
217
    trailing = fields.pop()
218
    assert trailing == ''
219
    # consider turning fields into a tuple.
220
221
    # skip the first field which is the trailing null from the header.
222
    cur = 1
223
    # Each line now has an extra '\n' field which is not used
224
    # so we just skip over it
225
    # entry size:
226
    #  3 fields for the key
227
    #  + number of fields per tree_data (5) * tree count
228
    #  + newline
229
    num_present_parents = state._num_present_parents()
230
    tree_count = 1 + num_present_parents
231
    entry_size = state._fields_per_entry()
232
    expected_field_count = entry_size * state._num_entries
233
    field_count = len(fields)
234
    # this checks our adjustment, and also catches file too short.
235
    assert field_count - cur == expected_field_count, \
236
        'field count incorrect %s != %s, entry_size=%s, '\
237
        'num_entries=%s fields=%r' % (
238
            field_count - cur, expected_field_count, entry_size,
239
            state._num_entries, fields)
240
241
    if num_present_parents == 1:
242
        # Bind external functions to local names
243
        _int = int
244
        # We access all fields in order, so we can just iterate over
245
        # them. Grab an straight iterator over the fields. (We use an
246
        # iterator because we don't want to do a lot of additions, nor
247
        # do we want to do a lot of slicing)
248
        next = iter(fields).next
249
        # Move the iterator to the current position
250
        for x in xrange(cur):
251
            next()
252
        # The two blocks here are deliberate: the root block and the
253
        # contents-of-root block.
254
        state._dirblocks = [('', []), ('', [])]
255
        current_block = state._dirblocks[0][1]
256
        current_dirname = ''
257
        append_entry = current_block.append
258
        for count in xrange(state._num_entries):
259
            dirname = next()
260
            name = next()
261
            file_id = next()
262
            if dirname != current_dirname:
263
                # new block - different dirname
264
                current_block = []
265
                current_dirname = dirname
266
                state._dirblocks.append((current_dirname, current_block))
267
                append_entry = current_block.append
268
            # we know current_dirname == dirname, so re-use it to avoid
269
            # creating new strings
270
            entry = ((current_dirname, name, file_id),
271
                     [(# Current Tree
272
                         next(),                # minikind
273
                         next(),                # fingerprint
274
                         _int(next()),          # size
275
                         next() == 'y',         # executable
276
                         next(),                # packed_stat or revision_id
277
                     ),
278
                     ( # Parent 1
279
                         next(),                # minikind
280
                         next(),                # fingerprint
281
                         _int(next()),          # size
282
                         next() == 'y',         # executable
283
                         next(),                # packed_stat or revision_id
284
                     ),
285
                     ])
286
            trailing = next()
287
            assert trailing == '\n'
288
            # append the entry to the current block
289
            append_entry(entry)
290
        state._split_root_dirblock_into_contents()
291
    else:
292
293
        fields_to_entry = state._get_fields_to_entry()
294
        entries = []
295
        entries_append = entries.append
296
        pos = cur
297
        entry_size = entry_size
298
        while pos < field_count:
299
            entries_append(fields_to_entry(fields[pos:pos+entry_size]))
300
            pos = pos + entry_size
301
        state._entries_to_current_state(entries)
302
    # To convert from format 2  => format 3
303
    # state._dirblocks = sorted(state._dirblocks,
304
    #                          key=lambda blk:blk[0].split('/'))
305
    # To convert from format 3 => format 2
306
    # state._dirblocks = sorted(state._dirblocks)
307
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED