~bzr-pqm/bzr/bzr.dev

4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
4241.6.4 by Robert Collins
Really build on old pyrex.
18
#python2.4 support
19
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
20
    pass
4241.6.4 by Robert Collins
Really build on old pyrex.
21
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
22
cdef extern from *:
23
    ctypedef unsigned int size_t
24
    int memcmp(void *, void*, size_t)
25
    void memcpy(void *, void*, size_t)
26
    void *memchr(void *s, int c, size_t len)
27
    long strtol(char *, char **, int)
28
    void sprintf(char *, char *, ...)
29
30
cdef extern from "Python.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
31
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
32
    struct _PyObject:
33
        pass
34
    ctypedef _PyObject PyObject
35
    int PyTuple_CheckExact(object p)
36
    Py_ssize_t PyTuple_GET_SIZE(object t)
37
    int PyString_CheckExact(object)
38
    char *PyString_AS_STRING(object s)
39
    Py_ssize_t PyString_GET_SIZE(object)
40
41
    int PyDict_SetItem(object d, object k, object v) except -1
42
43
    object PyTuple_New(Py_ssize_t count)
44
    void PyTuple_SET_ITEM(object t, Py_ssize_t offset, object)
45
46
    void Py_INCREF(object)
47
48
    PyObject * PyTuple_GET_ITEM_ptr "PyTuple_GET_ITEM" (object t,
49
                                                        Py_ssize_t offset)
50
    int PyString_CheckExact_ptr "PyString_CheckExact" (PyObject *p)
51
    Py_ssize_t PyString_GET_SIZE_ptr "PyString_GET_SIZE" (PyObject *s)
52
    char *PyString_AS_STRING_ptr "PyString_AS_STRING" (PyObject *s)
53
    object PyString_FromStringAndSize(char*, Py_ssize_t)
54
55
cdef extern from "zlib.h":
56
    ctypedef unsigned long uLong
57
    ctypedef unsigned int uInt
58
    ctypedef unsigned char Bytef
59
60
    uLong crc32(uLong crc, Bytef *buf, uInt len)
61
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
62
# It seems we need to import the definitions so that the pyrex compiler has
63
# local names to access them.
64
from _static_tuple_c cimport StaticTuple,\
65
    import_static_tuple_c, StaticTuple_New, \
66
    StaticTuple_Intern, StaticTuple_SET_ITEM, StaticTuple_CheckExact
67
68
69
# This sets up the StaticTuple C_API functionality
70
import_static_tuple_c()
71
72
cdef object _LeafNode
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
73
_LeafNode = None
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
74
cdef object _InternalNode
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
75
_InternalNode = None
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
76
cdef object _unknown
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
77
_unknown = None
78
4459.2.1 by Vincent Ladeuil
Use a consistent scheme for naming pyrex source files.
79
# We shouldn't just copy this from _dirstate_helpers_pyx
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
80
cdef void* _my_memrchr(void *s, int c, size_t n):
81
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
82
    cdef char *pos
83
    cdef char *start
84
85
    start = <char*>s
86
    pos = start + n - 1
87
    while pos >= start:
88
        if pos[0] == c:
89
            return <void*>pos
90
        pos = pos - 1
91
    return NULL
92
93
94
def _search_key_16(key):
95
    """See chk_map._search_key_16."""
96
    cdef Py_ssize_t num_bits
97
    cdef Py_ssize_t i, j
98
    cdef Py_ssize_t num_out_bytes
99
    cdef Bytef *c_bit
100
    cdef uLong c_len
101
    cdef uInt crc_val
102
    cdef Py_ssize_t out_off
103
    cdef char *c_out
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
104
    # cdef PyObject *bit
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
105
4679.9.4 by John Arbash Meinel
A bit broken, but getting there.
106
    if not StaticTuple_CheckExact(key):
107
        raise TypeError('key %r is not a StaticTuple' % (key,))
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
108
    num_bits = len(key)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
109
    # 4 bytes per crc32, and another 1 byte between bits
110
    num_out_bytes = (9 * num_bits) - 1
111
    out = PyString_FromStringAndSize(NULL, num_out_bytes)
112
    c_out = PyString_AS_STRING(out)
113
    for i from 0 <= i < num_bits:
114
        if i > 0:
115
            c_out[0] = c'\x00'
116
            c_out = c_out + 1
117
        # We use the _ptr variant, because GET_ITEM returns a borrowed
118
        # reference, and Pyrex assumes that returned 'object' are a new
119
        # reference
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
120
        # XXX: This needs to be updated for PySequence_GetItem since both
121
        #      PyTuple and StaticTuple support that api
122
        bit = key[i]# PyTuple_GET_ITEM_ptr(key, i)
123
        if not PyString_CheckExact(bit):
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
124
            raise TypeError('Bit %d of %r is not a string' % (i, key))
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
125
        c_bit = <Bytef *>PyString_AS_STRING(bit)
126
        c_len = PyString_GET_SIZE(bit)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
127
        crc_val = crc32(0, c_bit, c_len)
128
        # Hex(val) order
129
        sprintf(c_out, '%08X', crc_val)
130
        c_out = c_out + 8
131
    return out
132
133
134
def _search_key_255(key):
135
    """See chk_map._search_key_255."""
136
    cdef Py_ssize_t num_bits
137
    cdef Py_ssize_t i, j
138
    cdef Py_ssize_t num_out_bytes
139
    cdef Bytef *c_bit
140
    cdef uLong c_len
141
    cdef uInt crc_val
142
    cdef Py_ssize_t out_off
143
    cdef char *c_out
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
144
    # cdef PyObject *bit
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
145
4679.9.4 by John Arbash Meinel
A bit broken, but getting there.
146
    if not StaticTuple_CheckExact(key):
147
        raise TypeError('key %r is not a StaticTuple' % (key,))
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
148
    num_bits = len(key)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
149
    # 4 bytes per crc32, and another 1 byte between bits
150
    num_out_bytes = (5 * num_bits) - 1
151
    out = PyString_FromStringAndSize(NULL, num_out_bytes)
152
    c_out = PyString_AS_STRING(out)
153
    for i from 0 <= i < num_bits:
154
        if i > 0:
155
            c_out[0] = c'\x00'
156
            c_out = c_out + 1
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
157
        bit = key[i] # PyTuple_GET_ITEM_ptr(key, i)
158
        if not PyString_CheckExact(bit):
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
159
            raise TypeError('Bit %d of %r is not a string: %r' % (i, key,
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
160
            bit))
161
        c_bit = <Bytef *>PyString_AS_STRING(bit)
162
        c_len = PyString_GET_SIZE(bit)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
163
        crc_val = crc32(0, c_bit, c_len)
164
        # MSB order
165
        c_out[0] = (crc_val >> 24) & 0xFF
166
        c_out[1] = (crc_val >> 16) & 0xFF
167
        c_out[2] = (crc_val >> 8) & 0xFF
168
        c_out[3] = (crc_val >> 0) & 0xFF
169
        for j from 0 <= j < 4:
170
            if c_out[j] == c'\n':
171
                c_out[j] = c'_'
172
        c_out = c_out + 4
173
    return out
174
175
176
cdef int _get_int_from_line(char **cur, char *end, char *message) except -1:
177
    """Read a positive integer from the data stream.
178
179
    :param cur: The start of the data, this will be moved to after the
180
        trailing newline when done.
181
    :param end: Do not parse any data past this byte.
182
    :return: The integer stored in those bytes
183
    """
184
    cdef int value
185
    cdef char *next_line, *next
186
187
    next_line = <char *>memchr(cur[0], c'\n', end - cur[0])
188
    if next_line == NULL:
189
        raise ValueError("Missing %s line\n" % message)
190
191
    value = strtol(cur[0], &next, 10)
192
    if next != next_line:
193
        raise ValueError("%s line not a proper int\n" % message)
194
    cur[0] = next_line + 1
195
    return value
196
197
198
def _deserialise_leaf_node(bytes, key, search_key_func=None):
199
    """Deserialise bytes, with key key, into a LeafNode.
200
201
    :param bytes: The bytes of the node.
202
    :param key: The key that the serialised node has.
203
    """
204
    cdef char *c_bytes, *cur, *next, *end
205
    cdef char *next_line
206
    cdef Py_ssize_t c_bytes_len, prefix_length, items_length
207
    cdef int maximum_size, width, length, i, prefix_tail_len
208
    cdef int num_value_lines, num_prefix_bits
209
    cdef char *prefix, *value_start, *prefix_tail
210
    cdef char *next_null, *last_null, *line_start
211
    cdef char *c_entry, *entry_start
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
212
    cdef StaticTuple entry_bits
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
213
214
    if _LeafNode is None:
215
        from bzrlib import chk_map
216
        _LeafNode = chk_map.LeafNode
217
        _InternalNode = chk_map.InternalNode
218
        _unknown = chk_map._unknown
219
220
    result = _LeafNode(search_key_func=search_key_func)
221
    # Splitlines can split on '\r' so don't use it, split('\n') adds an
222
    # extra '' if the bytes ends in a final newline.
223
    if not PyString_CheckExact(bytes):
224
        raise TypeError('bytes must be a plain string not %s' % (type(bytes),))
225
226
    c_bytes = PyString_AS_STRING(bytes)
227
    c_bytes_len = PyString_GET_SIZE(bytes)
228
229
    if c_bytes_len < 9 or memcmp(c_bytes, "chkleaf:\n", 9) != 0:
230
        raise ValueError("not a serialised leaf node: %r" % bytes)
231
    if c_bytes[c_bytes_len - 1] != c'\n':
232
        raise ValueError("bytes does not end in a newline")
233
234
    end = c_bytes + c_bytes_len
235
    cur = c_bytes + 9
236
    maximum_size = _get_int_from_line(&cur, end, "maximum_size")
237
    width = _get_int_from_line(&cur, end, "width")
238
    length = _get_int_from_line(&cur, end, "length")
239
240
    next_line = <char *>memchr(cur, c'\n', end - cur)
241
    if next_line == NULL:
242
        raise ValueError('Missing the prefix line\n')
243
    prefix = cur
244
    prefix_length = next_line - cur
245
    cur = next_line + 1
246
247
    prefix_bits = []
248
    prefix_tail = prefix
249
    num_prefix_bits = 0
250
    next_null = <char *>memchr(prefix, c'\0', prefix_length)
251
    while next_null != NULL:
252
        num_prefix_bits = num_prefix_bits + 1
253
        prefix_bits.append(
254
            PyString_FromStringAndSize(prefix_tail, next_null - prefix_tail))
255
        prefix_tail = next_null + 1
256
        next_null = <char *>memchr(prefix_tail, c'\0', next_line - prefix_tail)
257
    prefix_tail_len = next_line - prefix_tail
258
259
    if num_prefix_bits >= width:
260
        raise ValueError('Prefix has too many nulls versus width')
261
262
    items_length = end - cur
263
    items = {}
264
    while cur < end:
265
        line_start = cur
266
        next_line = <char *>memchr(cur, c'\n', end - cur)
267
        if next_line == NULL:
268
            raise ValueError('null line\n')
269
        last_null = <char *>_my_memrchr(cur, c'\0', next_line - cur)
270
        if last_null == NULL:
271
            raise ValueError('fail to find the num value lines null')
272
        next_null = last_null + 1 # move past NULL
273
        num_value_lines = _get_int_from_line(&next_null, next_line + 1,
274
                                             "num value lines")
275
        cur = next_line + 1
276
        value_start = cur
277
        # Walk num_value_lines forward
278
        for i from 0 <= i < num_value_lines:
279
            next_line = <char *>memchr(cur, c'\n', end - cur)
280
            if next_line == NULL:
281
                raise ValueError('missing trailing newline')
282
            cur = next_line + 1
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
283
        entry_bits = StaticTuple_New(width)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
284
        for i from 0 <= i < num_prefix_bits:
4679.9.4 by John Arbash Meinel
A bit broken, but getting there.
285
            # TODO: Use PyList_GetItem, or turn prefix_bits into a
286
            #       tuple/StaticTuple
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
287
            entry = prefix_bits[i]
288
            # SET_ITEM 'steals' a reference
289
            Py_INCREF(entry)
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
290
            StaticTuple_SET_ITEM(entry_bits, i, entry)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
291
        value = PyString_FromStringAndSize(value_start, next_line - value_start)
292
        # The next entry bit needs the 'tail' from the prefix, and first part
293
        # of the line
294
        entry_start = line_start
295
        next_null = <char *>memchr(entry_start, c'\0',
296
                                   last_null - entry_start + 1)
297
        if next_null == NULL:
298
            raise ValueError('bad no null, bad')
299
        entry = PyString_FromStringAndSize(NULL,
300
                    prefix_tail_len + next_null - line_start)
301
        c_entry = PyString_AS_STRING(entry)
302
        if prefix_tail_len > 0:
303
            memcpy(c_entry, prefix_tail, prefix_tail_len)
304
        if next_null - line_start > 0:
305
            memcpy(c_entry + prefix_tail_len, line_start, next_null - line_start)
306
        Py_INCREF(entry)
307
        i = num_prefix_bits
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
308
        StaticTuple_SET_ITEM(entry_bits, i, entry)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
309
        while next_null != last_null: # We have remaining bits
310
            i = i + 1
311
            if i > width:
312
                raise ValueError("Too many bits for entry")
313
            entry_start = next_null + 1
314
            next_null = <char *>memchr(entry_start, c'\0',
315
                                       last_null - entry_start + 1)
316
            if next_null == NULL:
317
                raise ValueError('bad no null')
318
            entry = PyString_FromStringAndSize(entry_start,
319
                                               next_null - entry_start)
320
            Py_INCREF(entry)
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
321
            StaticTuple_SET_ITEM(entry_bits, i, entry)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
322
        if len(entry_bits) != width:
323
            raise AssertionError(
324
                'Incorrect number of elements (%d vs %d)'
325
                % (len(entry_bits)+1, width + 1))
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
326
        entry_bits = StaticTuple_Intern(entry_bits)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
327
        PyDict_SetItem(items, entry_bits, value)
328
    if len(items) != length:
329
        raise ValueError("item count (%d) mismatch for key %s,"
330
                         " bytes %r" % (length, entry_bits, bytes))
331
    result._items = items
332
    result._len = length
333
    result._maximum_size = maximum_size
334
    result._key = key
335
    result._key_width = width
336
    result._raw_size = items_length + length * prefix_length
337
    if length == 0:
338
        result._search_prefix = None
339
        result._common_serialised_prefix = None
340
    else:
341
        result._search_prefix = _unknown
342
        result._common_serialised_prefix = PyString_FromStringAndSize(prefix,
343
                                                prefix_length)
344
    if c_bytes_len != result._current_size():
345
        raise AssertionError('_current_size computed incorrectly %d != %d',
346
            c_bytes_len, result._current_size())
347
    return result
348
349
350
def _deserialise_internal_node(bytes, key, search_key_func=None):
351
    cdef char *c_bytes, *cur, *next, *end
352
    cdef char *next_line
353
    cdef Py_ssize_t c_bytes_len, prefix_length
354
    cdef int maximum_size, width, length, i, prefix_tail_len
355
    cdef char *prefix, *line_prefix, *next_null, *c_item_prefix
356
357
    if _InternalNode is None:
358
        from bzrlib import chk_map
359
        _LeafNode = chk_map.LeafNode
360
        _InternalNode = chk_map.InternalNode
361
        _unknown = chk_map._unknown
362
    result = _InternalNode(search_key_func=search_key_func)
363
4679.9.4 by John Arbash Meinel
A bit broken, but getting there.
364
    if not StaticTuple_CheckExact(key):
365
        raise TypeError('key %r is not a StaticTuple' % (key,))
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
366
    if not PyString_CheckExact(bytes):
367
        raise TypeError('bytes must be a plain string not %s' % (type(bytes),))
368
369
    c_bytes = PyString_AS_STRING(bytes)
370
    c_bytes_len = PyString_GET_SIZE(bytes)
371
372
    if c_bytes_len < 9 or memcmp(c_bytes, "chknode:\n", 9) != 0:
373
        raise ValueError("not a serialised internal node: %r" % bytes)
374
    if c_bytes[c_bytes_len - 1] != c'\n':
375
        raise ValueError("bytes does not end in a newline")
376
377
    items = {}
378
    cur = c_bytes + 9
379
    end = c_bytes + c_bytes_len
380
    maximum_size = _get_int_from_line(&cur, end, "maximum_size")
381
    width = _get_int_from_line(&cur, end, "width")
382
    length = _get_int_from_line(&cur, end, "length")
383
384
    next_line = <char *>memchr(cur, c'\n', end - cur)
385
    if next_line == NULL:
386
        raise ValueError('Missing the prefix line\n')
387
    prefix = cur
388
    prefix_length = next_line - cur
389
    cur = next_line + 1
390
391
    while cur < end:
392
        # Find the null separator
393
        next_line = <char *>memchr(cur, c'\n', end - cur)
394
        if next_line == NULL:
395
            raise ValueError('missing trailing newline')
396
        next_null = <char *>_my_memrchr(cur, c'\0', next_line - cur)
397
        if next_null == NULL:
398
            raise ValueError('bad no null')
399
        item_prefix = PyString_FromStringAndSize(NULL,
400
            prefix_length + next_null - cur)
401
        c_item_prefix = PyString_AS_STRING(item_prefix)
402
        if prefix_length:
403
            memcpy(c_item_prefix, prefix, prefix_length)
404
        memcpy(c_item_prefix + prefix_length, cur, next_null - cur)
405
        flat_key = PyString_FromStringAndSize(next_null + 1,
406
                                              next_line - next_null - 1)
4679.9.1 by John Arbash Meinel
Merge in the static-tuple-no-use branch, and bring back the chk_map use.
407
        flat_key = StaticTuple(flat_key).intern()
408
        PyDict_SetItem(items, item_prefix, flat_key)
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
409
        cur = next_line + 1
410
    assert len(items) > 0
411
    result._items = items
412
    result._len = length
413
    result._maximum_size = maximum_size
414
    result._key = key
415
    result._key_width = width
416
    # XXX: InternalNodes don't really care about their size, and this will
417
    #      change if we add prefix compression
418
    result._raw_size = None # len(bytes)
419
    result._node_width = len(item_prefix)
420
    result._search_prefix = PyString_FromStringAndSize(prefix, prefix_length)
421
    return result