~bzr-pqm/bzr/bzr.dev

3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
1
# Copyright (C) 2009 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
25
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
26
    int PyString_CheckExact(object)
27
    char * PyString_AS_STRING(object)
28
    Py_ssize_t PyString_GET_SIZE(object)
29
    object PyString_FromStringAndSize(char *, Py_ssize_t)
30
31
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
32
cdef extern from *:
33
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
34
    void * malloc(size_t) nogil
35
    void * realloc(void *, size_t) nogil
36
    void free(void *) nogil
37
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
38
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
39
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
41
    struct source_info:
42
        void *buf
43
        unsigned long size
44
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
45
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
46
        pass
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
47
    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
    delta_index * create_delta_index_from_delta(source_info *delta,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
49
                                                delta_index *old) nogil
50
    void free_delta_index(delta_index *index) nogil
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
51
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
52
             void *buf, unsigned long bufsize,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
53
             unsigned long *delta_size, unsigned long max_delta_size) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
54
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
55
                                     unsigned char *top) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
56
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
57
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
58
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
59
cdef void *safe_malloc(size_t count) except NULL:
60
    cdef void *result
61
    result = malloc(count)
62
    if result == NULL:
63
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
64
    return result
65
66
67
cdef void *safe_realloc(void * old, size_t count) except NULL:
68
    cdef void *result
69
    result = realloc(old, count)
70
    if result == NULL:
71
        raise MemoryError('Failed to reallocate to %d bytes of memory'
72
                          % (count,))
73
    return result
74
75
76
cdef int safe_free(void **val) except -1:
77
    assert val != NULL
78
    if val[0] != NULL:
79
        free(val[0])
80
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
81
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
82
def make_delta_index(source):
83
    return DeltaIndex(source)
84
85
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
86
cdef class DeltaIndex:
87
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
88
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
89
    # isn't performance critical
90
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
91
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
92
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
93
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
94
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
95
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
96
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
97
    def __init__(self, source=None):
98
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
99
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
100
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
101
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
102
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
103
        self._source_offset = 0
104
105
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
106
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
107
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
108
    def __repr__(self):
109
        return '%s(%d, %d)' % (self.__class__.__name__,
110
            len(self._sources), self._source_offset)
111
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
112
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
113
        if self._index != NULL:
114
            free_delta_index(self._index)
115
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
116
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
117
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
118
    def _has_index(self):
119
        return (self._index != NULL)
120
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
121
    def add_delta_source(self, delta, unadded_bytes):
122
        """Add a new delta to the source texts.
123
124
        :param delta: The text of the delta, this must be a byte string.
125
        :param unadded_bytes: Number of bytes that were added to the source
126
            that were not indexed.
127
        """
128
        cdef char *c_delta
129
        cdef Py_ssize_t c_delta_size
130
        cdef delta_index *index
131
        cdef unsigned int source_location
132
        cdef source_info *src
133
        cdef unsigned int num_indexes
134
135
        if not PyString_CheckExact(delta):
136
            raise TypeError('delta is not a str')
137
138
        source_location = len(self._sources)
139
        if source_location >= self._max_num_sources:
140
            self._expand_sources()
141
        self._sources.append(delta)
142
        c_delta = PyString_AS_STRING(delta)
143
        c_delta_size = PyString_GET_SIZE(delta)
144
        src = self._source_infos + source_location
145
        src.buf = c_delta
146
        src.size = c_delta_size
147
        src.agg_offset = self._source_offset + unadded_bytes
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
148
        with nogil:
149
            index = create_delta_index_from_delta(src, self._index)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
150
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
151
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
152
            free_delta_index(self._index)
153
            self._index = index
154
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
155
    def add_source(self, source, unadded_bytes):
156
        """Add a new bit of source text to the delta indexes.
157
158
        :param source: The text in question, this must be a byte string
159
        :param unadded_bytes: Assume there are this many bytes that didn't get
160
            added between this source and the end of the previous source.
161
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
162
        cdef char *c_source
163
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
164
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
165
        cdef unsigned int source_location
166
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
167
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
168
169
        if not PyString_CheckExact(source):
170
            raise TypeError('source is not a str')
171
0.23.42 by John Arbash Meinel
Change the code around again.
172
        source_location = len(self._sources)
173
        if source_location >= self._max_num_sources:
174
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
175
        if source_location != 0 and self._index == NULL:
176
            # We were lazy about populating the index, create it now
177
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
178
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
179
        c_source = PyString_AS_STRING(source)
180
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
181
        src = self._source_infos + source_location
182
        src.buf = c_source
183
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
184
0.23.42 by John Arbash Meinel
Change the code around again.
185
        src.agg_offset = self._source_offset + unadded_bytes
186
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
187
        # We delay creating the index on the first insert
188
        if source_location != 0:
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
189
            with nogil:
190
                index = create_delta_index(src, self._index)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
191
            if index != NULL:
192
                free_delta_index(self._index)
193
                self._index = index
194
195
    cdef _populate_first_index(self):
196
        cdef delta_index *index
197
        if len(self._sources) != 1 or self._index != NULL:
198
            raise AssertionError('_populate_first_index should only be'
199
                ' called when we have a single source and no index yet')
200
201
        # We know that self._index is already NULL, so whatever
202
        # create_delta_index returns is fine
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
203
        with nogil:
204
            self._index = create_delta_index(&self._source_infos[0], NULL)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
205
        assert self._index != NULL
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
206
0.23.42 by John Arbash Meinel
Change the code around again.
207
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
208
        raise RuntimeError('if we move self._source_infos, then we need to'
209
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
210
        self._max_num_sources = self._max_num_sources * 2
211
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
212
                                                sizeof(source_info)
213
                                                * self._max_num_sources)
214
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
215
    def make_delta(self, target_bytes, max_delta_size=0):
216
        """Create a delta from the current source to the target bytes."""
217
        cdef char *target
218
        cdef Py_ssize_t target_size
219
        cdef void * delta
220
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
221
        cdef unsigned long c_max_delta_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
222
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
223
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
224
            if len(self._sources) == 0:
225
                return None
226
            # We were just lazy about generating the index
227
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
228
229
        if not PyString_CheckExact(target_bytes):
230
            raise TypeError('target is not a str')
231
232
        target = PyString_AS_STRING(target_bytes)
233
        target_size = PyString_GET_SIZE(target_bytes)
234
235
        # TODO: inline some of create_delta so we at least don't have to double
236
        #       malloc, and can instead use PyString_FromStringAndSize, to
237
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
238
        c_max_delta_size = max_delta_size
239
        with nogil:
240
            delta = create_delta(self._index,
241
                                 target, target_size,
242
                                 &delta_size, c_max_delta_size)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
243
        result = None
244
        if delta:
245
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
246
            free(delta)
247
        return result
248
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
249
250
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
251
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
252
    di = DeltaIndex(source_bytes)
253
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
254
255
256
def apply_delta(source_bytes, delta_bytes):
257
    """Apply a delta generated by make_delta to source_bytes."""
258
    cdef char *source
259
    cdef Py_ssize_t source_size
260
    cdef char *delta
261
    cdef Py_ssize_t delta_size
262
263
    if not PyString_CheckExact(source_bytes):
264
        raise TypeError('source is not a str')
265
    if not PyString_CheckExact(delta_bytes):
266
        raise TypeError('delta is not a str')
267
    source = PyString_AS_STRING(source_bytes)
268
    source_size = PyString_GET_SIZE(source_bytes)
269
    delta = PyString_AS_STRING(delta_bytes)
270
    delta_size = PyString_GET_SIZE(delta_bytes)
271
    # Code taken from patch-delta.c, only brought here to give better error
272
    # handling, and to avoid double allocating memory
273
    if (delta_size < DELTA_SIZE_MIN):
274
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
275
        raise RuntimeError('delta_size %d smaller than min delta size %d'
276
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
277
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
278
    return _apply_delta(source, source_size, delta, delta_size)
279
280
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
281
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
282
    unsigned char cmd, unsigned int *offset, unsigned int *length) nogil:
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
283
    """Decode a copy instruction from the next few bytes.
284
285
    A copy instruction is a variable number of bytes, so we will parse the
286
    bytes we care about, and return the new position, as well as the offset and
287
    length referred to in the bytes.
288
289
    :param bytes: Pointer to the start of bytes after cmd
290
    :param cmd: The command code
291
    :return: Pointer to the bytes just after the last decode byte
292
    """
293
    cdef unsigned int off, size, count
294
    off = 0
295
    size = 0
296
    count = 0
297
    if (cmd & 0x01):
298
        off = bytes[count]
299
        count = count + 1
300
    if (cmd & 0x02):
301
        off = off | (bytes[count] << 8)
302
        count = count + 1
303
    if (cmd & 0x04):
304
        off = off | (bytes[count] << 16)
305
        count = count + 1
306
    if (cmd & 0x08):
307
        off = off | (bytes[count] << 24)
308
        count = count + 1
309
    if (cmd & 0x10):
310
        size = bytes[count]
311
        count = count + 1
312
    if (cmd & 0x20):
313
        size = size | (bytes[count] << 8)
314
        count = count + 1
315
    if (cmd & 0x40):
316
        size = size | (bytes[count] << 16)
317
        count = count + 1
318
    if (size == 0):
319
        size = 0x10000
320
    offset[0] = off
321
    length[0] = size
322
    return bytes + count
323
324
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
325
cdef object _apply_delta(char *source, Py_ssize_t source_size,
326
                         char *delta, Py_ssize_t delta_size):
327
    """common functionality between apply_delta and apply_delta_to_source."""
328
    cdef unsigned char *data, *top
329
    cdef unsigned char *dst_buf, *out, cmd
330
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
331
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
332
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
333
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
334
    data = <unsigned char *>delta
335
    top = data + delta_size
336
337
    # now the result size
338
    size = get_delta_hdr_size(&data, top)
339
    result = PyString_FromStringAndSize(NULL, size)
340
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
341
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
342
    failed = 0
343
    with nogil:
344
        out = dst_buf
345
        while (data < top):
346
            cmd = data[0]
347
            data = data + 1
348
            if (cmd & 0x80):
349
                # Copy instruction
350
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
351
                if (cp_off + cp_size < cp_size or
352
                    cp_off + cp_size > source_size or
353
                    cp_size > size):
354
                    failed = 1
355
                    break
356
                memcpy(out, source + cp_off, cp_size)
357
                out = out + cp_size
358
                size = size - cp_size
359
            else:
360
                # Insert instruction
361
                if cmd == 0:
362
                    # cmd == 0 is reserved for future encoding
363
                    # extensions. In the mean time we must fail when
364
                    # encountering them (might be data corruption).
365
                    failed = 2
366
                    break
367
                if cmd > size:
368
                    failed = 3
369
                    break
370
                memcpy(out, data, cmd)
371
                out = out + cmd
372
                data = data + cmd
373
                size = size - cmd
374
    if failed:
375
        if failed == 1:
376
            raise ValueError('Something wrong with:'
377
                ' cp_off = %s, cp_size = %s'
378
                ' source_size = %s, size = %s'
379
                % (cp_off, cp_size, source_size, size))
380
        elif failed == 2:
381
            raise ValueError('Got delta opcode: 0, not supported')
382
        elif failed == 3:
383
            raise ValueError('Insert instruction longer than remaining'
384
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
385
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
386
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
387
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
388
        raise RuntimeError('Did not extract the number of bytes we expected'
389
            ' we were left with %d bytes in "size", and top - data = %d'
390
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
391
        return None
392
393
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
394
    if (out - dst_buf) != PyString_GET_SIZE(result):
395
        raise RuntimeError('Number of bytes extracted did not match the'
396
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
397
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
398
399
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
400
def apply_delta_to_source(source, delta_start, delta_end):
401
    """Extract a delta from source bytes, and apply it."""
402
    cdef char *c_source
403
    cdef Py_ssize_t c_source_size
404
    cdef char *c_delta
405
    cdef Py_ssize_t c_delta_size
406
    cdef Py_ssize_t c_delta_start, c_delta_end
407
408
    if not PyString_CheckExact(source):
409
        raise TypeError('source is not a str')
410
    c_source_size = PyString_GET_SIZE(source)
411
    c_delta_start = delta_start
412
    c_delta_end = delta_end
413
    if c_delta_start >= c_source_size:
414
        raise ValueError('delta starts after source')
415
    if c_delta_end > c_source_size:
416
        raise ValueError('delta ends after source')
417
    if c_delta_start >= c_delta_end:
418
        raise ValueError('delta starts after it ends')
419
420
    c_delta_size = c_delta_end - c_delta_start
421
    c_source = PyString_AS_STRING(source)
422
    c_delta = c_source + c_delta_start
423
    # We don't use source_size, because we know the delta should not refer to
424
    # any bytes after it starts
425
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
426
427
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
428
def encode_base128_int(val):
429
    """Convert an integer into a 7-bit lsb encoding."""
430
    cdef unsigned int c_val
431
    cdef Py_ssize_t count
432
    cdef unsigned int num_bytes
433
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
434
435
    c_val = val
436
    count = 0
437
    while c_val >= 0x80 and count < 8:
438
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
439
        c_val = c_val >> 7
440
        count = count + 1
441
    if count >= 8 or c_val >= 0x80:
442
        raise ValueError('encode_base128_int overflowed the buffer')
443
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
444
    count = count + 1
445
    return PyString_FromStringAndSize(<char *>c_bytes, count)
446
447
448
def decode_base128_int(bytes):
449
    """Decode an integer from a 7-bit lsb encoding."""
450
    cdef int offset
451
    cdef int val
452
    cdef unsigned int uval
453
    cdef int shift
454
    cdef Py_ssize_t num_low_bytes
455
    cdef unsigned char *c_bytes
456
457
    offset = 0
458
    val = 0
459
    shift = 0
460
    if not PyString_CheckExact(bytes):
461
        raise TypeError('bytes is not a string')
462
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
463
    # We take off 1, because we have to be able to decode the non-expanded byte
464
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
465
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
466
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
467
        shift = shift + 7
468
        offset = offset + 1
469
    if c_bytes[offset] & 0x80:
470
        raise ValueError('Data not properly formatted, we ran out of'
471
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
472
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
473
    offset = offset + 1
474
    if val < 0:
475
        uval = <unsigned int> val
476
        return uval, offset
477
    return val, offset
478
479