~bzr-pqm/bzr/bzr.dev

3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
1
# Copyright (C) 2009 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
25
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
26
    int PyString_CheckExact(object)
27
    char * PyString_AS_STRING(object)
28
    Py_ssize_t PyString_GET_SIZE(object)
29
    object PyString_FromStringAndSize(char *, Py_ssize_t)
30
31
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
32
cdef extern from *:
33
    ctypedef unsigned long size_t
34
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
35
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
36
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
37
    void memcpy(void *, void *, size_t)
38
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
39
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
41
    struct source_info:
42
        void *buf
43
        unsigned long size
44
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
45
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
46
        pass
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
47
    delta_index * create_delta_index(source_info *src, delta_index *old)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
    delta_index * create_delta_index_from_delta(source_info *delta,
49
                                                delta_index *old)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
50
    void free_delta_index(delta_index *index)
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
51
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
52
             void *buf, unsigned long bufsize,
53
             unsigned long *delta_size, unsigned long max_delta_size)
54
    unsigned long get_delta_hdr_size(unsigned char **datap,
55
                                     unsigned char *top)
56
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
57
    void *patch_delta(void *src_buf, unsigned long src_size,
58
                      void *delta_buf, unsigned long delta_size,
59
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
60
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
61
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
62
cdef void *safe_malloc(size_t count) except NULL:
63
    cdef void *result
64
    result = malloc(count)
65
    if result == NULL:
66
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
67
    return result
68
69
70
cdef void *safe_realloc(void * old, size_t count) except NULL:
71
    cdef void *result
72
    result = realloc(old, count)
73
    if result == NULL:
74
        raise MemoryError('Failed to reallocate to %d bytes of memory'
75
                          % (count,))
76
    return result
77
78
79
cdef int safe_free(void **val) except -1:
80
    assert val != NULL
81
    if val[0] != NULL:
82
        free(val[0])
83
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
84
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
85
def make_delta_index(source):
86
    return DeltaIndex(source)
87
88
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
cdef class DeltaIndex:
90
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
91
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
92
    # isn't performance critical
93
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
94
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
95
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
96
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
97
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
98
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
99
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
100
    def __init__(self, source=None):
101
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
102
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
103
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
104
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
105
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
        self._source_offset = 0
107
108
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
110
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
    def __repr__(self):
112
        return '%s(%d, %d)' % (self.__class__.__name__,
113
            len(self._sources), self._source_offset)
114
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
115
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
116
        if self._index != NULL:
117
            free_delta_index(self._index)
118
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
119
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
120
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
121
    def add_delta_source(self, delta, unadded_bytes):
122
        """Add a new delta to the source texts.
123
124
        :param delta: The text of the delta, this must be a byte string.
125
        :param unadded_bytes: Number of bytes that were added to the source
126
            that were not indexed.
127
        """
128
        cdef char *c_delta
129
        cdef Py_ssize_t c_delta_size
130
        cdef delta_index *index
131
        cdef unsigned int source_location
132
        cdef source_info *src
133
        cdef unsigned int num_indexes
134
135
        if not PyString_CheckExact(delta):
136
            raise TypeError('delta is not a str')
137
138
        source_location = len(self._sources)
139
        if source_location >= self._max_num_sources:
140
            self._expand_sources()
141
        self._sources.append(delta)
142
        c_delta = PyString_AS_STRING(delta)
143
        c_delta_size = PyString_GET_SIZE(delta)
144
        src = self._source_infos + source_location
145
        src.buf = c_delta
146
        src.size = c_delta_size
147
        src.agg_offset = self._source_offset + unadded_bytes
148
        index = create_delta_index_from_delta(src, self._index)
149
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
150
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
151
            free_delta_index(self._index)
152
            self._index = index
153
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
154
    def add_source(self, source, unadded_bytes):
155
        """Add a new bit of source text to the delta indexes.
156
157
        :param source: The text in question, this must be a byte string
158
        :param unadded_bytes: Assume there are this many bytes that didn't get
159
            added between this source and the end of the previous source.
160
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
161
        cdef char *c_source
162
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
163
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
164
        cdef unsigned int source_location
165
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
166
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
167
168
        if not PyString_CheckExact(source):
169
            raise TypeError('source is not a str')
170
0.23.42 by John Arbash Meinel
Change the code around again.
171
        source_location = len(self._sources)
172
        if source_location >= self._max_num_sources:
173
            self._expand_sources()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
174
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
175
        c_source = PyString_AS_STRING(source)
176
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
177
        src = self._source_infos + source_location
178
        src.buf = c_source
179
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
180
0.23.42 by John Arbash Meinel
Change the code around again.
181
        src.agg_offset = self._source_offset + unadded_bytes
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
182
        index = create_delta_index(src, self._index)
0.23.42 by John Arbash Meinel
Change the code around again.
183
        self._source_offset = src.agg_offset + src.size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
184
        if index != NULL:
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
185
            free_delta_index(self._index)
186
            self._index = index
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
187
0.23.42 by John Arbash Meinel
Change the code around again.
188
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
189
        raise RuntimeError('if we move self._source_infos, then we need to'
190
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
191
        self._max_num_sources = self._max_num_sources * 2
192
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
193
                                                sizeof(source_info)
194
                                                * self._max_num_sources)
195
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
196
    def make_delta(self, target_bytes, max_delta_size=0):
197
        """Create a delta from the current source to the target bytes."""
198
        cdef char *target
199
        cdef Py_ssize_t target_size
200
        cdef void * delta
201
        cdef unsigned long delta_size
202
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
203
        if self._index == NULL:
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
204
            return None
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
205
206
        if not PyString_CheckExact(target_bytes):
207
            raise TypeError('target is not a str')
208
209
        target = PyString_AS_STRING(target_bytes)
210
        target_size = PyString_GET_SIZE(target_bytes)
211
212
        # TODO: inline some of create_delta so we at least don't have to double
213
        #       malloc, and can instead use PyString_FromStringAndSize, to
214
        #       allocate the bytes into the final string
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
215
        delta = create_delta(self._index,
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
216
                             target, target_size,
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
217
                             &delta_size, max_delta_size)
218
        result = None
219
        if delta:
220
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
221
            free(delta)
222
        return result
223
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
224
225
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
226
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
227
    di = DeltaIndex(source_bytes)
228
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
229
230
231
def apply_delta(source_bytes, delta_bytes):
232
    """Apply a delta generated by make_delta to source_bytes."""
233
    cdef char *source
234
    cdef Py_ssize_t source_size
235
    cdef char *delta
236
    cdef Py_ssize_t delta_size
237
238
    if not PyString_CheckExact(source_bytes):
239
        raise TypeError('source is not a str')
240
    if not PyString_CheckExact(delta_bytes):
241
        raise TypeError('delta is not a str')
242
    source = PyString_AS_STRING(source_bytes)
243
    source_size = PyString_GET_SIZE(source_bytes)
244
    delta = PyString_AS_STRING(delta_bytes)
245
    delta_size = PyString_GET_SIZE(delta_bytes)
246
    # Code taken from patch-delta.c, only brought here to give better error
247
    # handling, and to avoid double allocating memory
248
    if (delta_size < DELTA_SIZE_MIN):
249
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
250
        raise RuntimeError('delta_size %d smaller than min delta size %d'
251
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
252
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
253
    return _apply_delta(source, source_size, delta, delta_size)
254
255
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
256
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
257
    unsigned char cmd, unsigned int *offset, unsigned int *length):
258
    """Decode a copy instruction from the next few bytes.
259
260
    A copy instruction is a variable number of bytes, so we will parse the
261
    bytes we care about, and return the new position, as well as the offset and
262
    length referred to in the bytes.
263
264
    :param bytes: Pointer to the start of bytes after cmd
265
    :param cmd: The command code
266
    :return: Pointer to the bytes just after the last decode byte
267
    """
268
    cdef unsigned int off, size, count
269
    off = 0
270
    size = 0
271
    count = 0
272
    if (cmd & 0x01):
273
        off = bytes[count]
274
        count = count + 1
275
    if (cmd & 0x02):
276
        off = off | (bytes[count] << 8)
277
        count = count + 1
278
    if (cmd & 0x04):
279
        off = off | (bytes[count] << 16)
280
        count = count + 1
281
    if (cmd & 0x08):
282
        off = off | (bytes[count] << 24)
283
        count = count + 1
284
    if (cmd & 0x10):
285
        size = bytes[count]
286
        count = count + 1
287
    if (cmd & 0x20):
288
        size = size | (bytes[count] << 8)
289
        count = count + 1
290
    if (cmd & 0x40):
291
        size = size | (bytes[count] << 16)
292
        count = count + 1
293
    if (size == 0):
294
        size = 0x10000
295
    offset[0] = off
296
    length[0] = size
297
    return bytes + count
298
299
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
300
cdef object _apply_delta(char *source, Py_ssize_t source_size,
301
                         char *delta, Py_ssize_t delta_size):
302
    """common functionality between apply_delta and apply_delta_to_source."""
303
    cdef unsigned char *data, *top
304
    cdef unsigned char *dst_buf, *out, cmd
305
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
306
    cdef unsigned int cp_off, cp_size
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
307
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
308
    data = <unsigned char *>delta
309
    top = data + delta_size
310
311
    # now the result size
312
    size = get_delta_hdr_size(&data, top)
313
    result = PyString_FromStringAndSize(NULL, size)
314
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
315
316
    out = dst_buf
317
    while (data < top):
318
        cmd = data[0]
319
        data = data + 1
320
        if (cmd & 0x80):
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
321
            # Copy instruction
322
            data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
323
            if (cp_off + cp_size < cp_size or
324
                cp_off + cp_size > source_size or
325
                cp_size > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
326
                raise RuntimeError('Something wrong with:'
327
                    ' cp_off = %s, cp_size = %s'
328
                    ' source_size = %s, size = %s'
329
                    % (cp_off, cp_size, source_size, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
330
            memcpy(out, source + cp_off, cp_size)
331
            out = out + cp_size
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
332
            size = size - cp_size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
333
        else:
334
            # Insert instruction
335
            if cmd == 0:
336
                # cmd == 0 is reserved for future encoding
337
                # extensions. In the mean time we must fail when
338
                # encountering them (might be data corruption).
339
                raise RuntimeError('Got delta opcode: 0, not supported')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
340
            if (cmd > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
341
                raise RuntimeError('Insert instruction longer than remaining'
342
                    ' bytes: %d > %d' % (cmd, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
343
            memcpy(out, data, cmd)
344
            out = out + cmd
345
            data = data + cmd
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
346
            size = size - cmd
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
347
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
348
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
349
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
350
        raise RuntimeError('Did not extract the number of bytes we expected'
351
            ' we were left with %d bytes in "size", and top - data = %d'
352
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
353
        return None
354
355
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
356
    if (out - dst_buf) != PyString_GET_SIZE(result):
357
        raise RuntimeError('Number of bytes extracted did not match the'
358
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
359
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
360
361
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
362
def apply_delta_to_source(source, delta_start, delta_end):
363
    """Extract a delta from source bytes, and apply it."""
364
    cdef char *c_source
365
    cdef Py_ssize_t c_source_size
366
    cdef char *c_delta
367
    cdef Py_ssize_t c_delta_size
368
    cdef Py_ssize_t c_delta_start, c_delta_end
369
370
    if not PyString_CheckExact(source):
371
        raise TypeError('source is not a str')
372
    c_source_size = PyString_GET_SIZE(source)
373
    c_delta_start = delta_start
374
    c_delta_end = delta_end
375
    if c_delta_start >= c_source_size:
376
        raise ValueError('delta starts after source')
377
    if c_delta_end > c_source_size:
378
        raise ValueError('delta ends after source')
379
    if c_delta_start >= c_delta_end:
380
        raise ValueError('delta starts after it ends')
381
382
    c_delta_size = c_delta_end - c_delta_start
383
    c_source = PyString_AS_STRING(source)
384
    c_delta = c_source + c_delta_start
385
    # We don't use source_size, because we know the delta should not refer to
386
    # any bytes after it starts
387
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
388
389
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
390
def encode_base128_int(val):
391
    """Convert an integer into a 7-bit lsb encoding."""
392
    cdef unsigned int c_val
393
    cdef Py_ssize_t count
394
    cdef unsigned int num_bytes
395
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
396
397
    c_val = val
398
    count = 0
399
    while c_val >= 0x80 and count < 8:
400
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
401
        c_val = c_val >> 7
402
        count = count + 1
403
    if count >= 8 or c_val >= 0x80:
404
        raise ValueError('encode_base128_int overflowed the buffer')
405
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
406
    count = count + 1
407
    return PyString_FromStringAndSize(<char *>c_bytes, count)
408
409
410
def decode_base128_int(bytes):
411
    """Decode an integer from a 7-bit lsb encoding."""
412
    cdef int offset
413
    cdef int val
414
    cdef unsigned int uval
415
    cdef int shift
416
    cdef Py_ssize_t num_low_bytes
417
    cdef unsigned char *c_bytes
418
419
    offset = 0
420
    val = 0
421
    shift = 0
422
    if not PyString_CheckExact(bytes):
423
        raise TypeError('bytes is not a string')
424
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
425
    # We take off 1, because we have to be able to decode the non-expanded byte
426
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
427
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
428
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
429
        shift = shift + 7
430
        offset = offset + 1
431
    if c_bytes[offset] & 0x80:
432
        raise ValueError('Data not properly formatted, we ran out of'
433
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
434
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
435
    offset = offset + 1
436
    if val < 0:
437
        uval = <unsigned int> val
438
        return uval, offset
439
    return val, offset
440
441