17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef struct PyObject:
27
ctypedef int Py_ssize_t # Required for older pyrex versions
28
int PyString_CheckExact(object)
29
char * PyString_AS_STRING(object)
30
Py_ssize_t PyString_GET_SIZE(object)
31
object PyString_FromStringAndSize(char *, Py_ssize_t)
34
19
cdef extern from *:
35
20
ctypedef unsigned long size_t
36
void * malloc(size_t) nogil
37
void * realloc(void *, size_t) nogil
38
void free(void *) nogil
39
void memcpy(void *, void *, size_t) nogil
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
42
26
cdef extern from "delta.h":
43
27
struct source_info:
46
30
unsigned long agg_offset
47
31
struct delta_index:
49
delta_index * create_delta_index(source_info *src, delta_index *old) nogil
33
delta_index * create_delta_index(source_info *src, delta_index *old)
50
34
delta_index * create_delta_index_from_delta(source_info *delta,
51
delta_index *old) nogil
52
void free_delta_index(delta_index *index) nogil
36
void free_delta_index(delta_index *index)
53
37
void *create_delta(delta_index *indexes,
54
38
void *buf, unsigned long bufsize,
55
unsigned long *delta_size, unsigned long max_delta_size) nogil
39
unsigned long *delta_size, unsigned long max_delta_size)
56
40
unsigned long get_delta_hdr_size(unsigned char **datap,
57
unsigned char *top) nogil
58
unsigned long sizeof_delta_index(delta_index *index)
59
42
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
62
54
cdef void *safe_malloc(size_t count) except NULL:
108
100
if source is not None:
109
101
self.add_source(source, 0)
111
def __sizeof__(self):
112
# We want to track the _source_infos allocations, but the referenced
113
# void* are actually tracked in _sources itself.
114
# XXX: Cython is capable of doing sizeof(class) and returning the size
115
# of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
116
# to do it manually. *sigh* Note that we might get it wrong
117
# because of alignment issues.
119
# PyObject start, vtable *, 3 object pointers, 2 C ints
120
size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
121
+ sizeof(unsigned long)
122
+ sizeof(unsigned int))
123
+ (sizeof(source_info) * self._max_num_sources)
124
+ sizeof_delta_index(self._index))
127
103
def __repr__(self):
128
104
return '%s(%d, %d)' % (self.__class__.__name__,
129
105
len(self._sources), self._source_offset)
202
171
src.size = c_source_size
204
173
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
205
175
self._source_offset = src.agg_offset + src.size
206
# We delay creating the index on the first insert
207
if source_location != 0:
209
index = create_delta_index(src, self._index)
211
free_delta_index(self._index)
214
cdef _populate_first_index(self):
215
cdef delta_index *index
216
if len(self._sources) != 1 or self._index != NULL:
217
raise AssertionError('_populate_first_index should only be'
218
' called when we have a single source and no index yet')
220
# We know that self._index is already NULL, so whatever
221
# create_delta_index returns is fine
223
self._index = create_delta_index(&self._source_infos[0], NULL)
224
assert self._index != NULL
177
free_delta_index(self._index)
226
180
cdef _expand_sources(self):
227
181
raise RuntimeError('if we move self._source_infos, then we need to'
254
204
# TODO: inline some of create_delta so we at least don't have to double
255
205
# malloc, and can instead use PyString_FromStringAndSize, to
256
206
# allocate the bytes into the final string
257
c_max_delta_size = max_delta_size
259
delta = create_delta(self._index,
261
&delta_size, c_max_delta_size)
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
264
212
result = PyString_FromStringAndSize(<char *>delta, delta_size)
297
245
return _apply_delta(source, source_size, delta, delta_size)
300
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
301
unsigned char cmd, unsigned int *offset,
302
unsigned int *length) nogil: # cannot_raise
303
"""Decode a copy instruction from the next few bytes.
305
A copy instruction is a variable number of bytes, so we will parse the
306
bytes we care about, and return the new position, as well as the offset and
307
length referred to in the bytes.
309
:param bytes: Pointer to the start of bytes after cmd
310
:param cmd: The command code
311
:return: Pointer to the bytes just after the last decode byte
313
cdef unsigned int off, size, count
321
off = off | (bytes[count] << 8)
324
off = off | (bytes[count] << 16)
327
off = off | (bytes[count] << 24)
333
size = size | (bytes[count] << 8)
336
size = size | (bytes[count] << 16)
345
248
cdef object _apply_delta(char *source, Py_ssize_t source_size,
346
249
char *delta, Py_ssize_t delta_size):
347
250
"""common functionality between apply_delta and apply_delta_to_source."""
348
251
cdef unsigned char *data, *top
349
252
cdef unsigned char *dst_buf, *out, cmd
350
253
cdef Py_ssize_t size
351
cdef unsigned int cp_off, cp_size
254
cdef unsigned long cp_off, cp_size
354
256
data = <unsigned char *>delta
355
257
top = data + delta_size
358
260
size = get_delta_hdr_size(&data, top)
359
261
result = PyString_FromStringAndSize(NULL, size)
360
262
dst_buf = <unsigned char*>PyString_AS_STRING(result)
370
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
371
if (cp_off + cp_size < cp_size or
372
cp_off + cp_size > source_size or
376
memcpy(out, source + cp_off, cp_size)
378
size = size - cp_size
382
# cmd == 0 is reserved for future encoding
383
# extensions. In the mean time we must fail when
384
# encountering them (might be data corruption).
390
memcpy(out, data, cmd)
396
raise ValueError('Something wrong with:'
397
' cp_off = %s, cp_size = %s'
398
' source_size = %s, size = %s'
399
% (cp_off, cp_size, source_size, size))
401
raise ValueError('Got delta opcode: 0, not supported')
403
raise ValueError('Insert instruction longer than remaining'
404
' bytes: %d > %d' % (cmd, size))
263
# XXX: The original code added a trailing null here, but this shouldn't be
264
# necessary when using PyString_FromStringAndSize
277
cp_off = cp_off | (data[0] << 8)
280
cp_off = cp_off | (data[0] << 16)
283
cp_off = cp_off | (data[0] << 24)
289
cp_size = cp_size | (data[0] << 8)
292
cp_size = cp_size | (data[0] << 16)
296
if (cp_off + cp_size < cp_size or
297
cp_off + cp_size > source_size or
299
raise RuntimeError('Something wrong with:'
300
' cp_off = %s, cp_size = %s'
301
' source_size = %s, size = %s'
302
% (cp_off, cp_size, source_size, size))
303
memcpy(out, source + cp_off, cp_size)
305
size = size - cp_size
308
raise RuntimeError('Insert instruction longer than remaining'
309
' bytes: %d > %d' % (cmd, size))
310
memcpy(out, data, cmd)
316
# * cmd == 0 is reserved for future encoding
317
# * extensions. In the mean time we must fail when
318
# * encountering them (might be data corruption).
320
## /* XXX: error("unexpected delta opcode 0"); */
321
raise RuntimeError('Got delta opcode: 0, not supported')
407
324
if (data != top or size != 0):
325
## /* XXX: error("delta replay has gone wild"); */
408
326
raise RuntimeError('Did not extract the number of bytes we expected'
409
327
' we were left with %d bytes in "size", and top - data = %d'
410
328
% (size, <int>(top - data)))
413
331
# *dst_size = out - dst_buf;
414
if (out - dst_buf) != PyString_GET_SIZE(result):
415
raise RuntimeError('Number of bytes extracted did not match the'
416
' size encoded in the delta header.')
332
assert (out - dst_buf) == PyString_GET_SIZE(result)
483
399
# We take off 1, because we have to be able to decode the non-expanded byte
484
400
num_low_bytes = PyString_GET_SIZE(bytes) - 1
485
401
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
486
val = val | ((c_bytes[offset] & 0x7F) << shift)
402
val |= (c_bytes[offset] & 0x7F) << shift
487
403
shift = shift + 7
488
404
offset = offset + 1
489
405
if c_bytes[offset] & 0x80:
490
406
raise ValueError('Data not properly formatted, we ran out of'
491
407
' bytes before 0x80 stopped being set.')
492
val = val | (c_bytes[offset] << shift)
408
val |= c_bytes[offset] << shift
493
409
offset = offset + 1
495
411
uval = <unsigned int> val