~bzr-pqm/bzr/bzr.dev

3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
1
# Copyright (C) 2009 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
25
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
26
    int PyString_CheckExact(object)
27
    char * PyString_AS_STRING(object)
28
    Py_ssize_t PyString_GET_SIZE(object)
29
    object PyString_FromStringAndSize(char *, Py_ssize_t)
30
31
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
32
cdef extern from *:
33
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
34
    void * malloc(size_t) nogil
35
    void * realloc(void *, size_t) nogil
36
    void free(void *) nogil
37
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
38
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
39
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
41
    struct source_info:
42
        void *buf
43
        unsigned long size
44
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
45
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
46
        pass
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
47
    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
    delta_index * create_delta_index_from_delta(source_info *delta,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
49
                                                delta_index *old) nogil
50
    void free_delta_index(delta_index *index) nogil
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
51
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
52
             void *buf, unsigned long bufsize,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
53
             unsigned long *delta_size, unsigned long max_delta_size) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
54
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
55
                                     unsigned char *top) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
56
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
57
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
58
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
59
cdef void *safe_malloc(size_t count) except NULL:
60
    cdef void *result
61
    result = malloc(count)
62
    if result == NULL:
63
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
64
    return result
65
66
67
cdef void *safe_realloc(void * old, size_t count) except NULL:
68
    cdef void *result
69
    result = realloc(old, count)
70
    if result == NULL:
71
        raise MemoryError('Failed to reallocate to %d bytes of memory'
72
                          % (count,))
73
    return result
74
75
76
cdef int safe_free(void **val) except -1:
77
    assert val != NULL
78
    if val[0] != NULL:
79
        free(val[0])
80
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
81
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
82
def make_delta_index(source):
83
    return DeltaIndex(source)
84
85
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
86
cdef class DeltaIndex:
87
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
88
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
89
    # isn't performance critical
90
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
91
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
92
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
93
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
94
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
95
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
96
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
97
    def __init__(self, source=None):
98
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
99
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
100
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
101
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
102
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
103
        self._source_offset = 0
104
105
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
106
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
107
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
108
    def __repr__(self):
109
        return '%s(%d, %d)' % (self.__class__.__name__,
110
            len(self._sources), self._source_offset)
111
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
112
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
113
        if self._index != NULL:
114
            free_delta_index(self._index)
115
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
116
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
117
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
118
    def _has_index(self):
119
        return (self._index != NULL)
120
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
121
    def add_delta_source(self, delta, unadded_bytes):
122
        """Add a new delta to the source texts.
123
124
        :param delta: The text of the delta, this must be a byte string.
125
        :param unadded_bytes: Number of bytes that were added to the source
126
            that were not indexed.
127
        """
128
        cdef char *c_delta
129
        cdef Py_ssize_t c_delta_size
130
        cdef delta_index *index
131
        cdef unsigned int source_location
132
        cdef source_info *src
133
        cdef unsigned int num_indexes
134
135
        if not PyString_CheckExact(delta):
136
            raise TypeError('delta is not a str')
137
138
        source_location = len(self._sources)
139
        if source_location >= self._max_num_sources:
140
            self._expand_sources()
141
        self._sources.append(delta)
142
        c_delta = PyString_AS_STRING(delta)
143
        c_delta_size = PyString_GET_SIZE(delta)
144
        src = self._source_infos + source_location
145
        src.buf = c_delta
146
        src.size = c_delta_size
147
        src.agg_offset = self._source_offset + unadded_bytes
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
148
        with nogil:
149
            index = create_delta_index_from_delta(src, self._index)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
150
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
151
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
152
            free_delta_index(self._index)
153
            self._index = index
154
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
155
    def add_source(self, source, unadded_bytes):
156
        """Add a new bit of source text to the delta indexes.
157
158
        :param source: The text in question, this must be a byte string
159
        :param unadded_bytes: Assume there are this many bytes that didn't get
160
            added between this source and the end of the previous source.
161
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
162
        cdef char *c_source
163
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
164
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
165
        cdef unsigned int source_location
166
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
167
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
168
169
        if not PyString_CheckExact(source):
170
            raise TypeError('source is not a str')
171
0.23.42 by John Arbash Meinel
Change the code around again.
172
        source_location = len(self._sources)
173
        if source_location >= self._max_num_sources:
174
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
175
        if source_location != 0 and self._index == NULL:
176
            # We were lazy about populating the index, create it now
177
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
178
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
179
        c_source = PyString_AS_STRING(source)
180
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
181
        src = self._source_infos + source_location
182
        src.buf = c_source
183
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
184
0.23.42 by John Arbash Meinel
Change the code around again.
185
        src.agg_offset = self._source_offset + unadded_bytes
186
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
187
        # We delay creating the index on the first insert
188
        if source_location != 0:
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
189
            with nogil:
190
                index = create_delta_index(src, self._index)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
191
            if index != NULL:
192
                free_delta_index(self._index)
193
                self._index = index
194
195
    cdef _populate_first_index(self):
196
        cdef delta_index *index
197
        if len(self._sources) != 1 or self._index != NULL:
198
            raise AssertionError('_populate_first_index should only be'
199
                ' called when we have a single source and no index yet')
200
201
        # We know that self._index is already NULL, so whatever
202
        # create_delta_index returns is fine
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
203
        with nogil:
204
            self._index = create_delta_index(&self._source_infos[0], NULL)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
205
        assert self._index != NULL
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
206
0.23.42 by John Arbash Meinel
Change the code around again.
207
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
208
        raise RuntimeError('if we move self._source_infos, then we need to'
209
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
210
        self._max_num_sources = self._max_num_sources * 2
211
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
212
                                                sizeof(source_info)
213
                                                * self._max_num_sources)
214
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
215
    def make_delta(self, target_bytes, max_delta_size=0):
216
        """Create a delta from the current source to the target bytes."""
217
        cdef char *target
218
        cdef Py_ssize_t target_size
219
        cdef void * delta
220
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
221
        cdef unsigned long c_max_delta_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
222
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
223
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
224
            if len(self._sources) == 0:
225
                return None
226
            # We were just lazy about generating the index
227
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
228
229
        if not PyString_CheckExact(target_bytes):
230
            raise TypeError('target is not a str')
231
232
        target = PyString_AS_STRING(target_bytes)
233
        target_size = PyString_GET_SIZE(target_bytes)
234
235
        # TODO: inline some of create_delta so we at least don't have to double
236
        #       malloc, and can instead use PyString_FromStringAndSize, to
237
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
238
        c_max_delta_size = max_delta_size
239
        with nogil:
240
            delta = create_delta(self._index,
241
                                 target, target_size,
242
                                 &delta_size, c_max_delta_size)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
243
        result = None
244
        if delta:
245
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
246
            free(delta)
247
        return result
248
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
249
250
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
251
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
252
    di = DeltaIndex(source_bytes)
253
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
254
255
256
def apply_delta(source_bytes, delta_bytes):
257
    """Apply a delta generated by make_delta to source_bytes."""
258
    cdef char *source
259
    cdef Py_ssize_t source_size
260
    cdef char *delta
261
    cdef Py_ssize_t delta_size
262
263
    if not PyString_CheckExact(source_bytes):
264
        raise TypeError('source is not a str')
265
    if not PyString_CheckExact(delta_bytes):
266
        raise TypeError('delta is not a str')
267
    source = PyString_AS_STRING(source_bytes)
268
    source_size = PyString_GET_SIZE(source_bytes)
269
    delta = PyString_AS_STRING(delta_bytes)
270
    delta_size = PyString_GET_SIZE(delta_bytes)
271
    # Code taken from patch-delta.c, only brought here to give better error
272
    # handling, and to avoid double allocating memory
273
    if (delta_size < DELTA_SIZE_MIN):
274
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
275
        raise RuntimeError('delta_size %d smaller than min delta size %d'
276
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
277
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
278
    return _apply_delta(source, source_size, delta, delta_size)
279
280
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
281
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4932.1.1 by John Arbash Meinel
Merge the 2.0 branch, resolve one conflict.
282
    unsigned char cmd, unsigned int *offset,
283
    unsigned int *length) nogil: # cannot_raise
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
284
    """Decode a copy instruction from the next few bytes.
285
286
    A copy instruction is a variable number of bytes, so we will parse the
287
    bytes we care about, and return the new position, as well as the offset and
288
    length referred to in the bytes.
289
290
    :param bytes: Pointer to the start of bytes after cmd
291
    :param cmd: The command code
292
    :return: Pointer to the bytes just after the last decode byte
293
    """
294
    cdef unsigned int off, size, count
295
    off = 0
296
    size = 0
297
    count = 0
298
    if (cmd & 0x01):
299
        off = bytes[count]
300
        count = count + 1
301
    if (cmd & 0x02):
302
        off = off | (bytes[count] << 8)
303
        count = count + 1
304
    if (cmd & 0x04):
305
        off = off | (bytes[count] << 16)
306
        count = count + 1
307
    if (cmd & 0x08):
308
        off = off | (bytes[count] << 24)
309
        count = count + 1
310
    if (cmd & 0x10):
311
        size = bytes[count]
312
        count = count + 1
313
    if (cmd & 0x20):
314
        size = size | (bytes[count] << 8)
315
        count = count + 1
316
    if (cmd & 0x40):
317
        size = size | (bytes[count] << 16)
318
        count = count + 1
319
    if (size == 0):
320
        size = 0x10000
321
    offset[0] = off
322
    length[0] = size
323
    return bytes + count
324
325
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
326
cdef object _apply_delta(char *source, Py_ssize_t source_size,
327
                         char *delta, Py_ssize_t delta_size):
328
    """common functionality between apply_delta and apply_delta_to_source."""
329
    cdef unsigned char *data, *top
330
    cdef unsigned char *dst_buf, *out, cmd
331
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
332
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
333
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
334
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
335
    data = <unsigned char *>delta
336
    top = data + delta_size
337
338
    # now the result size
339
    size = get_delta_hdr_size(&data, top)
340
    result = PyString_FromStringAndSize(NULL, size)
341
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
342
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
343
    failed = 0
344
    with nogil:
345
        out = dst_buf
346
        while (data < top):
347
            cmd = data[0]
348
            data = data + 1
349
            if (cmd & 0x80):
350
                # Copy instruction
351
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
352
                if (cp_off + cp_size < cp_size or
353
                    cp_off + cp_size > source_size or
354
                    cp_size > size):
355
                    failed = 1
356
                    break
357
                memcpy(out, source + cp_off, cp_size)
358
                out = out + cp_size
359
                size = size - cp_size
360
            else:
361
                # Insert instruction
362
                if cmd == 0:
363
                    # cmd == 0 is reserved for future encoding
364
                    # extensions. In the mean time we must fail when
365
                    # encountering them (might be data corruption).
366
                    failed = 2
367
                    break
368
                if cmd > size:
369
                    failed = 3
370
                    break
371
                memcpy(out, data, cmd)
372
                out = out + cmd
373
                data = data + cmd
374
                size = size - cmd
375
    if failed:
376
        if failed == 1:
377
            raise ValueError('Something wrong with:'
378
                ' cp_off = %s, cp_size = %s'
379
                ' source_size = %s, size = %s'
380
                % (cp_off, cp_size, source_size, size))
381
        elif failed == 2:
382
            raise ValueError('Got delta opcode: 0, not supported')
383
        elif failed == 3:
384
            raise ValueError('Insert instruction longer than remaining'
385
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
386
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
387
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
388
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
389
        raise RuntimeError('Did not extract the number of bytes we expected'
390
            ' we were left with %d bytes in "size", and top - data = %d'
391
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
392
        return None
393
394
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
395
    if (out - dst_buf) != PyString_GET_SIZE(result):
396
        raise RuntimeError('Number of bytes extracted did not match the'
397
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
398
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
399
400
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
401
def apply_delta_to_source(source, delta_start, delta_end):
402
    """Extract a delta from source bytes, and apply it."""
403
    cdef char *c_source
404
    cdef Py_ssize_t c_source_size
405
    cdef char *c_delta
406
    cdef Py_ssize_t c_delta_size
407
    cdef Py_ssize_t c_delta_start, c_delta_end
408
409
    if not PyString_CheckExact(source):
410
        raise TypeError('source is not a str')
411
    c_source_size = PyString_GET_SIZE(source)
412
    c_delta_start = delta_start
413
    c_delta_end = delta_end
414
    if c_delta_start >= c_source_size:
415
        raise ValueError('delta starts after source')
416
    if c_delta_end > c_source_size:
417
        raise ValueError('delta ends after source')
418
    if c_delta_start >= c_delta_end:
419
        raise ValueError('delta starts after it ends')
420
421
    c_delta_size = c_delta_end - c_delta_start
422
    c_source = PyString_AS_STRING(source)
423
    c_delta = c_source + c_delta_start
424
    # We don't use source_size, because we know the delta should not refer to
425
    # any bytes after it starts
426
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
427
428
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
429
def encode_base128_int(val):
430
    """Convert an integer into a 7-bit lsb encoding."""
431
    cdef unsigned int c_val
432
    cdef Py_ssize_t count
433
    cdef unsigned int num_bytes
434
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
435
436
    c_val = val
437
    count = 0
438
    while c_val >= 0x80 and count < 8:
439
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
440
        c_val = c_val >> 7
441
        count = count + 1
442
    if count >= 8 or c_val >= 0x80:
443
        raise ValueError('encode_base128_int overflowed the buffer')
444
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
445
    count = count + 1
446
    return PyString_FromStringAndSize(<char *>c_bytes, count)
447
448
449
def decode_base128_int(bytes):
450
    """Decode an integer from a 7-bit lsb encoding."""
451
    cdef int offset
452
    cdef int val
453
    cdef unsigned int uval
454
    cdef int shift
455
    cdef Py_ssize_t num_low_bytes
456
    cdef unsigned char *c_bytes
457
458
    offset = 0
459
    val = 0
460
    shift = 0
461
    if not PyString_CheckExact(bytes):
462
        raise TypeError('bytes is not a string')
463
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
464
    # We take off 1, because we have to be able to decode the non-expanded byte
465
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
466
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
467
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
468
        shift = shift + 7
469
        offset = offset + 1
470
    if c_bytes[offset] & 0x80:
471
        raise ValueError('Data not properly formatted, we ran out of'
472
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
473
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
474
    offset = offset + 1
475
    if val < 0:
476
        uval = <unsigned int> val
477
        return uval, offset
478
    return val, offset
479
480