~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
3
# This program is free software; you can redistribute it and/or modify
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
7
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.4 by John Arbash Meinel
Fix the GPL and copyright statements in the pyrex files
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
16
17
"""Compiled extensions for doing compression."""
18
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
19
#python2.4 support
20
cdef extern from "python-compat.h":
4265.1.3 by John Arbash Meinel
restore the old Py_ssize_t import in the pyrex files.
21
    pass
22
23
24
cdef extern from "Python.h":
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
25
    ctypedef struct PyObject:
26
        pass
4265.1.1 by John Arbash Meinel
Merge the a couple rev older brisbane-core into bzr.dev, most things are resolve in favor of bzr.dev
27
    ctypedef int Py_ssize_t # Required for older pyrex versions
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
28
    int PyString_CheckExact(object)
29
    char * PyString_AS_STRING(object)
30
    Py_ssize_t PyString_GET_SIZE(object)
31
    object PyString_FromStringAndSize(char *, Py_ssize_t)
32
33
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
34
cdef extern from *:
35
    ctypedef unsigned long size_t
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
36
    void * malloc(size_t) nogil
37
    void * realloc(void *, size_t) nogil
38
    void free(void *) nogil
39
    void memcpy(void *, void *, size_t) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
40
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
41
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
42
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
43
    struct source_info:
44
        void *buf
45
        unsigned long size
46
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
47
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
48
        pass
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
49
    delta_index * create_delta_index(source_info *src, delta_index *old) nogil
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
50
    delta_index * create_delta_index_from_delta(source_info *delta,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
51
                                                delta_index *old) nogil
52
    void free_delta_index(delta_index *index) nogil
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
53
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
54
             void *buf, unsigned long bufsize,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
55
             unsigned long *delta_size, unsigned long max_delta_size) nogil
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
56
    unsigned long get_delta_hdr_size(unsigned char **datap,
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
57
                                     unsigned char *top) nogil
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
58
    unsigned long sizeof_delta_index(delta_index *index)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
59
    Py_ssize_t DELTA_SIZE_MIN
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
60
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
61
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
62
cdef void *safe_malloc(size_t count) except NULL:
63
    cdef void *result
64
    result = malloc(count)
65
    if result == NULL:
66
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
67
    return result
68
69
70
cdef void *safe_realloc(void * old, size_t count) except NULL:
71
    cdef void *result
72
    result = realloc(old, count)
73
    if result == NULL:
74
        raise MemoryError('Failed to reallocate to %d bytes of memory'
75
                          % (count,))
76
    return result
77
78
79
cdef int safe_free(void **val) except -1:
80
    assert val != NULL
81
    if val[0] != NULL:
82
        free(val[0])
83
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
84
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
85
def make_delta_index(source):
86
    return DeltaIndex(source)
87
88
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
89
cdef class DeltaIndex:
90
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
91
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
92
    # isn't performance critical
93
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
94
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
95
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
96
    cdef delta_index *_index
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
97
    cdef public unsigned long _source_offset
0.23.42 by John Arbash Meinel
Change the code around again.
98
    cdef readonly unsigned int _max_num_sources
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
99
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
100
    def __init__(self, source=None):
101
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
102
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
103
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
104
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
105
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
        self._source_offset = 0
107
108
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
110
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
111
    def __sizeof__(self):
112
        # We want to track the _source_infos allocations, but the referenced
113
        # void* are actually tracked in _sources itself.
5361.2.5 by John Arbash Meinel
Pyrex doesn't allow sizeof(class), so we have to unroll it manually.
114
        # XXX: Cython is capable of doing sizeof(class) and returning the size
115
        #      of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
116
        #      to do it manually. *sigh* Note that we might get it wrong
117
        #      because of alignment issues.
118
        cdef Py_ssize_t size
119
        # PyObject start, vtable *, 3 object pointers, 2 C ints
120
        size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
121
                 + sizeof(unsigned long)
122
                 + sizeof(unsigned int))
123
                + (sizeof(source_info) * self._max_num_sources)
124
                + sizeof_delta_index(self._index))
125
        return size
5361.2.3 by John Arbash Meinel
Add a __sizeof__ member for DeltaIndex.
126
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
127
    def __repr__(self):
128
        return '%s(%d, %d)' % (self.__class__.__name__,
129
            len(self._sources), self._source_offset)
130
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
131
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
132
        if self._index != NULL:
133
            free_delta_index(self._index)
134
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
135
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
136
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
137
    def _has_index(self):
138
        return (self._index != NULL)
139
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
140
    def add_delta_source(self, delta, unadded_bytes):
141
        """Add a new delta to the source texts.
142
143
        :param delta: The text of the delta, this must be a byte string.
144
        :param unadded_bytes: Number of bytes that were added to the source
145
            that were not indexed.
146
        """
147
        cdef char *c_delta
148
        cdef Py_ssize_t c_delta_size
149
        cdef delta_index *index
150
        cdef unsigned int source_location
151
        cdef source_info *src
152
        cdef unsigned int num_indexes
153
154
        if not PyString_CheckExact(delta):
155
            raise TypeError('delta is not a str')
156
157
        source_location = len(self._sources)
158
        if source_location >= self._max_num_sources:
159
            self._expand_sources()
160
        self._sources.append(delta)
161
        c_delta = PyString_AS_STRING(delta)
162
        c_delta_size = PyString_GET_SIZE(delta)
163
        src = self._source_infos + source_location
164
        src.buf = c_delta
165
        src.size = c_delta_size
166
        src.agg_offset = self._source_offset + unadded_bytes
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
167
        with nogil:
168
            index = create_delta_index_from_delta(src, self._index)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
169
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
170
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
171
            free_delta_index(self._index)
172
            self._index = index
173
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
174
    def add_source(self, source, unadded_bytes):
175
        """Add a new bit of source text to the delta indexes.
176
177
        :param source: The text in question, this must be a byte string
178
        :param unadded_bytes: Assume there are this many bytes that didn't get
179
            added between this source and the end of the previous source.
180
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
181
        cdef char *c_source
182
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
183
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
184
        cdef unsigned int source_location
185
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
186
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
187
188
        if not PyString_CheckExact(source):
189
            raise TypeError('source is not a str')
190
0.23.42 by John Arbash Meinel
Change the code around again.
191
        source_location = len(self._sources)
192
        if source_location >= self._max_num_sources:
193
            self._expand_sources()
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
194
        if source_location != 0 and self._index == NULL:
195
            # We were lazy about populating the index, create it now
196
            self._populate_first_index()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
197
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
198
        c_source = PyString_AS_STRING(source)
199
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
200
        src = self._source_infos + source_location
201
        src.buf = c_source
202
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
203
0.23.42 by John Arbash Meinel
Change the code around again.
204
        src.agg_offset = self._source_offset + unadded_bytes
205
        self._source_offset = src.agg_offset + src.size
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
206
        # We delay creating the index on the first insert
207
        if source_location != 0:
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
208
            with nogil:
209
                index = create_delta_index(src, self._index)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
210
            if index != NULL:
211
                free_delta_index(self._index)
212
                self._index = index
213
214
    cdef _populate_first_index(self):
215
        cdef delta_index *index
216
        if len(self._sources) != 1 or self._index != NULL:
217
            raise AssertionError('_populate_first_index should only be'
218
                ' called when we have a single source and no index yet')
219
220
        # We know that self._index is already NULL, so whatever
221
        # create_delta_index returns is fine
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
222
        with nogil:
223
            self._index = create_delta_index(&self._source_infos[0], NULL)
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
224
        assert self._index != NULL
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
225
0.23.42 by John Arbash Meinel
Change the code around again.
226
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
227
        raise RuntimeError('if we move self._source_infos, then we need to'
228
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
229
        self._max_num_sources = self._max_num_sources * 2
230
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
231
                                                sizeof(source_info)
232
                                                * self._max_num_sources)
233
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
234
    def make_delta(self, target_bytes, max_delta_size=0):
235
        """Create a delta from the current source to the target bytes."""
236
        cdef char *target
237
        cdef Py_ssize_t target_size
238
        cdef void * delta
239
        cdef unsigned long delta_size
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
240
        cdef unsigned long c_max_delta_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
241
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
242
        if self._index == NULL:
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
243
            if len(self._sources) == 0:
244
                return None
245
            # We were just lazy about generating the index
246
            self._populate_first_index()
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
247
248
        if not PyString_CheckExact(target_bytes):
249
            raise TypeError('target is not a str')
250
251
        target = PyString_AS_STRING(target_bytes)
252
        target_size = PyString_GET_SIZE(target_bytes)
253
254
        # TODO: inline some of create_delta so we at least don't have to double
255
        #       malloc, and can instead use PyString_FromStringAndSize, to
256
        #       allocate the bytes into the final string
4788.2.1 by John Arbash Meinel
Wrap the core groupcompress.create_delta calls with 'with nogil' statements.
257
        c_max_delta_size = max_delta_size
258
        with nogil:
259
            delta = create_delta(self._index,
260
                                 target, target_size,
261
                                 &delta_size, c_max_delta_size)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
262
        result = None
263
        if delta:
264
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
265
            free(delta)
266
        return result
267
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
268
269
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
270
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
271
    di = DeltaIndex(source_bytes)
272
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
273
274
275
def apply_delta(source_bytes, delta_bytes):
276
    """Apply a delta generated by make_delta to source_bytes."""
277
    cdef char *source
278
    cdef Py_ssize_t source_size
279
    cdef char *delta
280
    cdef Py_ssize_t delta_size
281
282
    if not PyString_CheckExact(source_bytes):
283
        raise TypeError('source is not a str')
284
    if not PyString_CheckExact(delta_bytes):
285
        raise TypeError('delta is not a str')
286
    source = PyString_AS_STRING(source_bytes)
287
    source_size = PyString_GET_SIZE(source_bytes)
288
    delta = PyString_AS_STRING(delta_bytes)
289
    delta_size = PyString_GET_SIZE(delta_bytes)
290
    # Code taken from patch-delta.c, only brought here to give better error
291
    # handling, and to avoid double allocating memory
292
    if (delta_size < DELTA_SIZE_MIN):
293
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
294
        raise RuntimeError('delta_size %d smaller than min delta size %d'
295
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
296
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
297
    return _apply_delta(source, source_size, delta, delta_size)
298
299
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
300
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
4932.1.1 by John Arbash Meinel
Merge the 2.0 branch, resolve one conflict.
301
    unsigned char cmd, unsigned int *offset,
302
    unsigned int *length) nogil: # cannot_raise
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
303
    """Decode a copy instruction from the next few bytes.
304
305
    A copy instruction is a variable number of bytes, so we will parse the
306
    bytes we care about, and return the new position, as well as the offset and
307
    length referred to in the bytes.
308
309
    :param bytes: Pointer to the start of bytes after cmd
310
    :param cmd: The command code
311
    :return: Pointer to the bytes just after the last decode byte
312
    """
313
    cdef unsigned int off, size, count
314
    off = 0
315
    size = 0
316
    count = 0
317
    if (cmd & 0x01):
318
        off = bytes[count]
319
        count = count + 1
320
    if (cmd & 0x02):
321
        off = off | (bytes[count] << 8)
322
        count = count + 1
323
    if (cmd & 0x04):
324
        off = off | (bytes[count] << 16)
325
        count = count + 1
326
    if (cmd & 0x08):
327
        off = off | (bytes[count] << 24)
328
        count = count + 1
329
    if (cmd & 0x10):
330
        size = bytes[count]
331
        count = count + 1
332
    if (cmd & 0x20):
333
        size = size | (bytes[count] << 8)
334
        count = count + 1
335
    if (cmd & 0x40):
336
        size = size | (bytes[count] << 16)
337
        count = count + 1
338
    if (size == 0):
339
        size = 0x10000
340
    offset[0] = off
341
    length[0] = size
342
    return bytes + count
343
344
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
345
cdef object _apply_delta(char *source, Py_ssize_t source_size,
346
                         char *delta, Py_ssize_t delta_size):
347
    """common functionality between apply_delta and apply_delta_to_source."""
348
    cdef unsigned char *data, *top
349
    cdef unsigned char *dst_buf, *out, cmd
350
    cdef Py_ssize_t size
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
351
    cdef unsigned int cp_off, cp_size
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
352
    cdef int failed
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
353
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
354
    data = <unsigned char *>delta
355
    top = data + delta_size
356
357
    # now the result size
358
    size = get_delta_hdr_size(&data, top)
359
    result = PyString_FromStringAndSize(NULL, size)
360
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
361
4788.2.2 by John Arbash Meinel
Stop holding the gil while extracting data.
362
    failed = 0
363
    with nogil:
364
        out = dst_buf
365
        while (data < top):
366
            cmd = data[0]
367
            data = data + 1
368
            if (cmd & 0x80):
369
                # Copy instruction
370
                data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
371
                if (cp_off + cp_size < cp_size or
372
                    cp_off + cp_size > source_size or
373
                    cp_size > size):
374
                    failed = 1
375
                    break
376
                memcpy(out, source + cp_off, cp_size)
377
                out = out + cp_size
378
                size = size - cp_size
379
            else:
380
                # Insert instruction
381
                if cmd == 0:
382
                    # cmd == 0 is reserved for future encoding
383
                    # extensions. In the mean time we must fail when
384
                    # encountering them (might be data corruption).
385
                    failed = 2
386
                    break
387
                if cmd > size:
388
                    failed = 3
389
                    break
390
                memcpy(out, data, cmd)
391
                out = out + cmd
392
                data = data + cmd
393
                size = size - cmd
394
    if failed:
395
        if failed == 1:
396
            raise ValueError('Something wrong with:'
397
                ' cp_off = %s, cp_size = %s'
398
                ' source_size = %s, size = %s'
399
                % (cp_off, cp_size, source_size, size))
400
        elif failed == 2:
401
            raise ValueError('Got delta opcode: 0, not supported')
402
        elif failed == 3:
403
            raise ValueError('Insert instruction longer than remaining'
404
                ' bytes: %d > %d' % (cmd, size))
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
405
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
406
    # sanity check
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
407
    if (data != top or size != 0):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
408
        raise RuntimeError('Did not extract the number of bytes we expected'
409
            ' we were left with %d bytes in "size", and top - data = %d'
410
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
411
        return None
412
413
    # *dst_size = out - dst_buf;
3735.40.20 by John Arbash Meinel
cleanup the apply_delta code a bit.
414
    if (out - dst_buf) != PyString_GET_SIZE(result):
415
        raise RuntimeError('Number of bytes extracted did not match the'
416
            ' size encoded in the delta header.')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
417
    return result
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
418
419
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
420
def apply_delta_to_source(source, delta_start, delta_end):
421
    """Extract a delta from source bytes, and apply it."""
422
    cdef char *c_source
423
    cdef Py_ssize_t c_source_size
424
    cdef char *c_delta
425
    cdef Py_ssize_t c_delta_size
426
    cdef Py_ssize_t c_delta_start, c_delta_end
427
428
    if not PyString_CheckExact(source):
429
        raise TypeError('source is not a str')
430
    c_source_size = PyString_GET_SIZE(source)
431
    c_delta_start = delta_start
432
    c_delta_end = delta_end
433
    if c_delta_start >= c_source_size:
434
        raise ValueError('delta starts after source')
435
    if c_delta_end > c_source_size:
436
        raise ValueError('delta ends after source')
437
    if c_delta_start >= c_delta_end:
438
        raise ValueError('delta starts after it ends')
439
440
    c_delta_size = c_delta_end - c_delta_start
441
    c_source = PyString_AS_STRING(source)
442
    c_delta = c_source + c_delta_start
443
    # We don't use source_size, because we know the delta should not refer to
444
    # any bytes after it starts
445
    return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
446
447
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
448
def encode_base128_int(val):
449
    """Convert an integer into a 7-bit lsb encoding."""
450
    cdef unsigned int c_val
451
    cdef Py_ssize_t count
452
    cdef unsigned int num_bytes
453
    cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
454
455
    c_val = val
456
    count = 0
457
    while c_val >= 0x80 and count < 8:
458
        c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
459
        c_val = c_val >> 7
460
        count = count + 1
461
    if count >= 8 or c_val >= 0x80:
462
        raise ValueError('encode_base128_int overflowed the buffer')
463
    c_bytes[count] = <unsigned char>(c_val & 0xFF)
464
    count = count + 1
465
    return PyString_FromStringAndSize(<char *>c_bytes, count)
466
467
468
def decode_base128_int(bytes):
469
    """Decode an integer from a 7-bit lsb encoding."""
470
    cdef int offset
471
    cdef int val
472
    cdef unsigned int uval
473
    cdef int shift
474
    cdef Py_ssize_t num_low_bytes
475
    cdef unsigned char *c_bytes
476
477
    offset = 0
478
    val = 0
479
    shift = 0
480
    if not PyString_CheckExact(bytes):
481
        raise TypeError('bytes is not a string')
482
    c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
483
    # We take off 1, because we have to be able to decode the non-expanded byte
484
    num_low_bytes = PyString_GET_SIZE(bytes) - 1
485
    while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
486
        val = val | ((c_bytes[offset] & 0x7F) << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
487
        shift = shift + 7
488
        offset = offset + 1
489
    if c_bytes[offset] & 0x80:
490
        raise ValueError('Data not properly formatted, we ran out of'
491
                         ' bytes before 0x80 stopped being set.')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
492
    val = val | (c_bytes[offset] << shift)
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
493
    offset = offset + 1
494
    if val < 0:
495
        uval = <unsigned int> val
496
        return uval, offset
497
    return val, offset
498
499