~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
25
    ctypedef struct PyObject:
26
        pass
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
27
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
28
    int PyString_CheckExact(object)
29
    char * PyString_AS_STRING(object)
30
    Py_ssize_t PyString_GET_SIZE(object)
31
    object PyString_FromStringAndSize(char *, Py_ssize_t)
32
33
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
34
cdef extern from *:
35
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
36
    void * malloc(size_t) nogil
37
    void * realloc(void *, size_t) nogil
38
    void free(void *) nogil
39
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
41
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
42
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
43
    struct source_info:
44
        void *buf
45
        unsigned long size
46
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
47
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
        pass
5698.2.10 by Martin
Add ctypedef marker which Cython may need though Pyrex does not
49
    ctypedef enum delta_result:
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
50
        DELTA_OK
51
        DELTA_OUT_OF_MEMORY
52
        DELTA_INDEX_NEEDED
53
        DELTA_SOURCE_EMPTY
54
        DELTA_SOURCE_BAD
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
55
        DELTA_BUFFER_EMPTY
56
        DELTA_SIZE_TOO_BIG
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
57
    delta_result create_delta_index(source_info *src,
58
                                    delta_index *old,
59
                                    delta_index **fresh) nogil
60
    delta_result create_delta_index_from_delta(source_info *delta,
61
                                               delta_index *old,
62
                                               delta_index **fresh) nogil
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
63
    void free_delta_index(delta_index *index) nogil
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
64
    delta_result create_delta(delta_index *indexes,
65
                              void *buf, unsigned long bufsize,
66
                              unsigned long *delta_size,
67
                              unsigned long max_delta_size,
68
                              void **delta_data) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
69
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
70
                                     unsigned char *top) nogil
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
71
    unsigned long sizeof_delta_index(delta_index *index)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
72
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
73
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
74
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
75
cdef void *safe_malloc(size_t count) except NULL:
76
    cdef void *result
77
    result = malloc(count)
78
    if result == NULL:
79
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
80
    return result
81
82
83
cdef void *safe_realloc(void * old, size_t count) except NULL:
84
    cdef void *result
85
    result = realloc(old, count)
86
    if result == NULL:
87
        raise MemoryError('Failed to reallocate to %d bytes of memory'
88
                          % (count,))
89
    return result
90
91
92
cdef int safe_free(void **val) except -1:
93
    assert val != NULL
94
    if val[0] != NULL:
95
        free(val[0])
96
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
97
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
98
def make_delta_index(source):
99
    return DeltaIndex(source)
100
101
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
102
cdef object _translate_delta_failure(delta_result result):
103
    if result == DELTA_OUT_OF_MEMORY:
104
        return MemoryError("Delta function failed to allocate memory")
105
    elif result == DELTA_INDEX_NEEDED:
106
        return ValueError("Delta function requires delta_index param")
107
    elif result == DELTA_SOURCE_EMPTY:
108
        return ValueError("Delta function given empty source_info param")
109
    elif result == DELTA_SOURCE_BAD:
110
        return RuntimeError("Delta function given invalid source_info param")
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
111
    elif result == DELTA_BUFFER_EMPTY:
112
        return ValueError("Delta function given empty buffer params")
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
113
    return AssertionError("Unrecognised delta result code: %d" % result)
114
115
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
116
cdef class DeltaIndex:
117
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
118
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
119
    # isn't performance critical
120
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
121
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
122
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
123
    cdef delta_index *_index
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
124
    cdef public unsigned long _source_offset
0.23.42 by John Arbash Meinel
Change the code around again.
125
    cdef readonly unsigned int _max_num_sources
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
126
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
127
    def __init__(self, source=None):
128
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
129
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
130
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
131
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
132
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
133
        self._source_offset = 0
134
135
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
136
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
137
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
138
    def __sizeof__(self):
139
        # We want to track the _source_infos allocations, but the referenced
140
        # void* are actually tracked in _sources itself.
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
141
        # XXX: Cython is capable of doing sizeof(class) and returning the size
142
        #      of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
143
        #      to do it manually. *sigh* Note that we might get it wrong
144
        #      because of alignment issues.
145
        cdef Py_ssize_t size
146
        # PyObject start, vtable *, 3 object pointers, 2 C ints
147
        size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
148
                 + sizeof(unsigned long)
149
                 + sizeof(unsigned int))
150
                + (sizeof(source_info) * self._max_num_sources)
151
                + sizeof_delta_index(self._index))
152
        return size
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
153
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
154
    def __repr__(self):
155
        return '%s(%d, %d)' % (self.__class__.__name__,
156
            len(self._sources), self._source_offset)
157
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
158
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
159
        if self._index != NULL:
160
            free_delta_index(self._index)
161
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
162
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
163
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
164
    def _has_index(self):
165
        return (self._index != NULL)
166
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
167
    def add_delta_source(self, delta, unadded_bytes):
168
        """Add a new delta to the source texts.
169
170
        :param delta: The text of the delta, this must be a byte string.
171
        :param unadded_bytes: Number of bytes that were added to the source
172
            that were not indexed.
173
        """
174
        cdef char *c_delta
175
        cdef Py_ssize_t c_delta_size
176
        cdef delta_index *index
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
177
        cdef delta_result res
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
178
        cdef unsigned int source_location
179
        cdef source_info *src
180
        cdef unsigned int num_indexes
181
182
        if not PyString_CheckExact(delta):
183
            raise TypeError('delta is not a str')
184
185
        source_location = len(self._sources)
186
        if source_location >= self._max_num_sources:
187
            self._expand_sources()
188
        self._sources.append(delta)
189
        c_delta = PyString_AS_STRING(delta)
190
        c_delta_size = PyString_GET_SIZE(delta)
191
        src = self._source_infos + source_location
192
        src.buf = c_delta
193
        src.size = c_delta_size
194
        src.agg_offset = self._source_offset + unadded_bytes
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
195
        with nogil:
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
196
            res = create_delta_index_from_delta(src, self._index, &index)
197
        if res != DELTA_OK:
198
            raise _translate_delta_failure(res)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
199
        self._source_offset = src.agg_offset + src.size
5698.2.3 by Martin
Change create_delta_index_from_delta too so NULL can be treated as MemoryError
200
        if index != self._index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
201
            free_delta_index(self._index)
202
            self._index = index
203
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
204
    def add_source(self, source, unadded_bytes):
205
        """Add a new bit of source text to the delta indexes.
206
207
        :param source: The text in question, this must be a byte string
208
        :param unadded_bytes: Assume there are this many bytes that didn't get
209
            added between this source and the end of the previous source.
210
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
211
        cdef char *c_source
212
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
213
        cdef delta_index *index
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
214
        cdef delta_result res
0.23.42 by John Arbash Meinel
Change the code around again.
215
        cdef unsigned int source_location
216
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
217
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
218
219
        if not PyString_CheckExact(source):
220
            raise TypeError('source is not a str')
221
0.23.42 by John Arbash Meinel
Change the code around again.
222
        source_location = len(self._sources)
223
        if source_location >= self._max_num_sources:
224
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
225
        if source_location != 0 and self._index == NULL:
226
            # We were lazy about populating the index, create it now
227
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
228
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
229
        c_source = PyString_AS_STRING(source)
230
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
231
        src = self._source_infos + source_location
232
        src.buf = c_source
233
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
234
0.23.42 by John Arbash Meinel
Change the code around again.
235
        src.agg_offset = self._source_offset + unadded_bytes
236
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
237
        # We delay creating the index on the first insert
238
        if source_location != 0:
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
239
            with nogil:
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
240
                res = create_delta_index(src, self._index, &index)
241
            if res != DELTA_OK:
242
                raise _translate_delta_failure(res)
5698.2.2 by Martin
Change create_delta_index signature so callers can treat NULL returns as MemoryError
243
            if index != self._index:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
244
                free_delta_index(self._index)
245
                self._index = index
246
247
    cdef _populate_first_index(self):
248
        cdef delta_index *index
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
249
        cdef delta_result res
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
250
        if len(self._sources) != 1 or self._index != NULL:
251
            raise AssertionError('_populate_first_index should only be'
252
                ' called when we have a single source and no index yet')
253
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
254
        # We know that self._index is already NULL, so create_delta_index
255
        # will always create a new index unless there's a malloc failure
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
256
        with nogil:
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
257
            res = create_delta_index(&self._source_infos[0], NULL, &index)
258
        if res != DELTA_OK:
259
            raise _translate_delta_failure(res)
260
        self._index = index
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
261
0.23.42 by John Arbash Meinel
Change the code around again.
262
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
263
        raise RuntimeError('if we move self._source_infos, then we need to'
264
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
265
        self._max_num_sources = self._max_num_sources * 2
266
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
267
                                                sizeof(source_info)
268
                                                * self._max_num_sources)
269
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
270
    def make_delta(self, target_bytes, max_delta_size=0):
271
        """Create a delta from the current source to the target bytes."""
272
        cdef char *target
273
        cdef Py_ssize_t target_size
274
        cdef void * delta
275
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
276
        cdef unsigned long c_max_delta_size
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
277
        cdef delta_result res
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
278
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
279
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
280
            if len(self._sources) == 0:
281
                return None
282
            # We were just lazy about generating the index
283
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
284
285
        if not PyString_CheckExact(target_bytes):
286
            raise TypeError('target is not a str')
287
288
        target = PyString_AS_STRING(target_bytes)
289
        target_size = PyString_GET_SIZE(target_bytes)
290
291
        # TODO: inline some of create_delta so we at least don't have to double
292
        #       malloc, and can instead use PyString_FromStringAndSize, to
293
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
294
        c_max_delta_size = max_delta_size
295
        with nogil:
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
296
            res = create_delta(self._index, target, target_size,
297
                               &delta_size, c_max_delta_size, &delta)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
298
        result = None
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
299
        if res == DELTA_OK:
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
300
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
301
            free(delta)
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
302
        elif res != DELTA_SIZE_TOO_BIG:
303
            raise _translate_delta_failure(res)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
304
        return result
305
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
306
307
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
308
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
309
    di = DeltaIndex(source_bytes)
310
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
311
312
313
def apply_delta(source_bytes, delta_bytes):
314
    """Apply a delta generated by make_delta to source_bytes."""
315
    cdef char *source
316
    cdef Py_ssize_t source_size
317
    cdef char *delta
318
    cdef Py_ssize_t delta_size
319
320
    if not PyString_CheckExact(source_bytes):
321
        raise TypeError('source is not a str')
322
    if not PyString_CheckExact(delta_bytes):
323
        raise TypeError('delta is not a str')
324
    source = PyString_AS_STRING(source_bytes)
325
    source_size = PyString_GET_SIZE(source_bytes)
326
    delta = PyString_AS_STRING(delta_bytes)
327
    delta_size = PyString_GET_SIZE(delta_bytes)
328
    # Code taken from patch-delta.c, only brought here to give better error
329
    # handling, and to avoid double allocating memory
330
    if (delta_size < DELTA_SIZE_MIN):
331
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
332
        raise RuntimeError('delta_size %d smaller than min delta size %d'
333
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
334
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
335
    return _apply_delta(source, source_size, delta, delta_size)
336
337
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
338
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4932.1.1 by John Arbash Meinel
Merge the 2.0 branch, resolve one conflict.
339
    unsigned char cmd, unsigned int *offset,
340
    unsigned int *length) nogil: # cannot_raise
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
341
    """Decode a copy instruction from the next few bytes.
342
343
    A copy instruction is a variable number of bytes, so we will parse the
344
    bytes we care about, and return the new position, as well as the offset and
345
    length referred to in the bytes.
346
347
    :param bytes: Pointer to the start of bytes after cmd
348
    :param cmd: The command code
349
    :return: Pointer to the bytes just after the last decode byte
350
    """
351
    cdef unsigned int off, size, count
352
    off = 0
353
    size = 0
354
    count = 0
355
    if (cmd & 0x01):
356
        off = bytes[count]
357
        count = count + 1
358
    if (cmd & 0x02):
359
        off = off | (bytes[count] << 8)
360
        count = count + 1
361
    if (cmd & 0x04):
362
        off = off | (bytes[count] << 16)
363
        count = count + 1
364
    if (cmd & 0x08):
365
        off = off | (bytes[count] << 24)
366
        count = count + 1
367
    if (cmd & 0x10):
368
        size = bytes[count]
369
        count = count + 1
370
    if (cmd & 0x20):
371
        size = size | (bytes[count] << 8)
372
        count = count + 1
373
    if (cmd & 0x40):
374
        size = size | (bytes[count] << 16)
375
        count = count + 1
376
    if (size == 0):
377
        size = 0x10000
378
    offset[0] = off
379
    length[0] = size
380
    return bytes + count
381
382
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
383
cdef object _apply_delta(char *source, Py_ssize_t source_size,
384
                         char *delta, Py_ssize_t delta_size):
385
    """common functionality between apply_delta and apply_delta_to_source."""
386
    cdef unsigned char *data, *top
387
    cdef unsigned char *dst_buf, *out, cmd
388
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
389
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
390
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
391
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
392
    data = <unsigned char *>delta
393
    top = data + delta_size
394
395
    # now the result size
396
    size = get_delta_hdr_size(&data, top)
397
    result = PyString_FromStringAndSize(NULL, size)
398
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
399
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
400
    failed = 0
401
    with nogil:
402
        out = dst_buf
403
        while (data < top):
404
            cmd = data[0]
405
            data = data + 1
406
            if (cmd & 0x80):
407
                # Copy instruction
408
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
409
                if (cp_off + cp_size < cp_size or
5698.2.4 by Martin
Make Py_ssize_t to uint downcast explict in _groupcompress_pyx to hush compiler warnings
410
                    cp_off + cp_size > <unsigned int>source_size or
411
                    cp_size > <unsigned int>size):
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
412
                    failed = 1
413
                    break
414
                memcpy(out, source + cp_off, cp_size)
415
                out = out + cp_size
416
                size = size - cp_size
417
            else:
418
                # Insert instruction
419
                if cmd == 0:
420
                    # cmd == 0 is reserved for future encoding
421
                    # extensions. In the mean time we must fail when
422
                    # encountering them (might be data corruption).
423
                    failed = 2
424
                    break
425
                if cmd > size:
426
                    failed = 3
427
                    break
428
                memcpy(out, data, cmd)
429
                out = out + cmd
430
                data = data + cmd
431
                size = size - cmd
432
    if failed:
433
        if failed == 1:
434
            raise ValueError('Something wrong with:'
435
                ' cp_off = %s, cp_size = %s'
436
                ' source_size = %s, size = %s'
437
                % (cp_off, cp_size, source_size, size))
438
        elif failed == 2:
439
            raise ValueError('Got delta opcode: 0, not supported')
440
        elif failed == 3:
441
            raise ValueError('Insert instruction longer than remaining'
442
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
443
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
444
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
445
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
446
        raise RuntimeError('Did not extract the number of bytes we expected'
447
            ' we were left with %d bytes in "size", and top - data = %d'
448
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
449
        return None
450
451
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
452
    if (out - dst_buf) != PyString_GET_SIZE(result):
453
        raise RuntimeError('Number of bytes extracted did not match the'
454
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
455
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
456
457
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
458
def apply_delta_to_source(source, delta_start, delta_end):
459
    """Extract a delta from source bytes, and apply it."""
460
    cdef char *c_source
461
    cdef Py_ssize_t c_source_size
462
    cdef char *c_delta
463
    cdef Py_ssize_t c_delta_size
464
    cdef Py_ssize_t c_delta_start, c_delta_end
465
466
    if not PyString_CheckExact(source):
467
        raise TypeError('source is not a str')
468
    c_source_size = PyString_GET_SIZE(source)
469
    c_delta_start = delta_start
470
    c_delta_end = delta_end
471
    if c_delta_start >= c_source_size:
472
        raise ValueError('delta starts after source')
473
    if c_delta_end > c_source_size:
474
        raise ValueError('delta ends after source')
475
    if c_delta_start >= c_delta_end:
476
        raise ValueError('delta starts after it ends')
477
478
    c_delta_size = c_delta_end - c_delta_start
479
    c_source = PyString_AS_STRING(source)
480
    c_delta = c_source + c_delta_start
481
    # We don't use source_size, because we know the delta should not refer to
482
    # any bytes after it starts
483
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
484
485
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
486
def encode_base128_int(val):
487
    """Convert an integer into a 7-bit lsb encoding."""
488
    cdef unsigned int c_val
489
    cdef Py_ssize_t count
490
    cdef unsigned int num_bytes
491
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
492
493
    c_val = val
494
    count = 0
495
    while c_val >= 0x80 and count < 8:
496
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
497
        c_val = c_val >> 7
498
        count = count + 1
499
    if count >= 8 or c_val >= 0x80:
500
        raise ValueError('encode_base128_int overflowed the buffer')
501
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
502
    count = count + 1
503
    return PyString_FromStringAndSize(<char *>c_bytes, count)
504
505
506
def decode_base128_int(bytes):
507
    """Decode an integer from a 7-bit lsb encoding."""
508
    cdef int offset
509
    cdef int val
510
    cdef unsigned int uval
511
    cdef int shift
512
    cdef Py_ssize_t num_low_bytes
513
    cdef unsigned char *c_bytes
514
515
    offset = 0
516
    val = 0
517
    shift = 0
518
    if not PyString_CheckExact(bytes):
519
        raise TypeError('bytes is not a string')
520
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
521
    # We take off 1, because we have to be able to decode the non-expanded byte
522
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
523
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
524
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
525
        shift = shift + 7
526
        offset = offset + 1
527
    if c_bytes[offset] & 0x80:
528
        raise ValueError('Data not properly formatted, we ran out of'
529
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
530
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
531
    offset = offset + 1
532
    if val < 0:
533
        uval = <unsigned int> val
534
        return uval, offset
535
    return val, offset
536
537