17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef struct PyObject:
27
ctypedef int Py_ssize_t # Required for older pyrex versions
28
int PyString_CheckExact(object)
29
char * PyString_AS_STRING(object)
30
Py_ssize_t PyString_GET_SIZE(object)
31
object PyString_FromStringAndSize(char *, Py_ssize_t)
19
34
cdef extern from *:
20
35
ctypedef unsigned long size_t
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
36
void * malloc(size_t) nogil
37
void * realloc(void *, size_t) nogil
38
void free(void *) nogil
39
void memcpy(void *, void *, size_t) nogil
26
42
cdef extern from "delta.h":
27
43
struct source_info:
30
46
unsigned long agg_offset
31
47
struct delta_index:
33
delta_index * create_delta_index(source_info *src, delta_index *old)
49
delta_index * create_delta_index(source_info *src, delta_index *old) nogil
34
50
delta_index * create_delta_index_from_delta(source_info *delta,
36
void free_delta_index(delta_index *index)
51
delta_index *old) nogil
52
void free_delta_index(delta_index *index) nogil
37
53
void *create_delta(delta_index *indexes,
38
54
void *buf, unsigned long bufsize,
39
unsigned long *delta_size, unsigned long max_delta_size)
55
unsigned long *delta_size, unsigned long max_delta_size) nogil
40
56
unsigned long get_delta_hdr_size(unsigned char **datap,
57
unsigned char *top) nogil
58
unsigned long sizeof_delta_index(delta_index *index)
42
59
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
54
62
cdef void *safe_malloc(size_t count) except NULL:
100
108
if source is not None:
101
109
self.add_source(source, 0)
111
def __sizeof__(self):
112
# We want to track the _source_infos allocations, but the referenced
113
# void* are actually tracked in _sources itself.
114
# XXX: Cython is capable of doing sizeof(class) and returning the size
115
# of the underlying struct. Pyrex (<= 0.9.9) refuses, so we need
116
# to do it manually. *sigh* Note that we might get it wrong
117
# because of alignment issues.
119
# PyObject start, vtable *, 3 object pointers, 2 C ints
120
size = ((sizeof(PyObject) + sizeof(void*) + 3*sizeof(PyObject*)
121
+ sizeof(unsigned long)
122
+ sizeof(unsigned int))
123
+ (sizeof(source_info) * self._max_num_sources)
124
+ sizeof_delta_index(self._index))
103
127
def __repr__(self):
104
128
return '%s(%d, %d)' % (self.__class__.__name__,
105
129
len(self._sources), self._source_offset)
171
202
src.size = c_source_size
173
204
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
175
205
self._source_offset = src.agg_offset + src.size
177
free_delta_index(self._index)
206
# We delay creating the index on the first insert
207
if source_location != 0:
209
index = create_delta_index(src, self._index)
211
free_delta_index(self._index)
214
cdef _populate_first_index(self):
215
cdef delta_index *index
216
if len(self._sources) != 1 or self._index != NULL:
217
raise AssertionError('_populate_first_index should only be'
218
' called when we have a single source and no index yet')
220
# We know that self._index is already NULL, so whatever
221
# create_delta_index returns is fine
223
self._index = create_delta_index(&self._source_infos[0], NULL)
224
assert self._index != NULL
180
226
cdef _expand_sources(self):
181
227
raise RuntimeError('if we move self._source_infos, then we need to'
204
254
# TODO: inline some of create_delta so we at least don't have to double
205
255
# malloc, and can instead use PyString_FromStringAndSize, to
206
256
# allocate the bytes into the final string
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
257
c_max_delta_size = max_delta_size
259
delta = create_delta(self._index,
261
&delta_size, c_max_delta_size)
212
264
result = PyString_FromStringAndSize(<char *>delta, delta_size)
245
297
return _apply_delta(source, source_size, delta, delta_size)
300
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
301
unsigned char cmd, unsigned int *offset,
302
unsigned int *length) nogil: # cannot_raise
303
"""Decode a copy instruction from the next few bytes.
305
A copy instruction is a variable number of bytes, so we will parse the
306
bytes we care about, and return the new position, as well as the offset and
307
length referred to in the bytes.
309
:param bytes: Pointer to the start of bytes after cmd
310
:param cmd: The command code
311
:return: Pointer to the bytes just after the last decode byte
313
cdef unsigned int off, size, count
321
off = off | (bytes[count] << 8)
324
off = off | (bytes[count] << 16)
327
off = off | (bytes[count] << 24)
333
size = size | (bytes[count] << 8)
336
size = size | (bytes[count] << 16)
248
345
cdef object _apply_delta(char *source, Py_ssize_t source_size,
249
346
char *delta, Py_ssize_t delta_size):
250
347
"""common functionality between apply_delta and apply_delta_to_source."""
251
348
cdef unsigned char *data, *top
252
349
cdef unsigned char *dst_buf, *out, cmd
253
350
cdef Py_ssize_t size
254
cdef unsigned long cp_off, cp_size
351
cdef unsigned int cp_off, cp_size
256
354
data = <unsigned char *>delta
257
355
top = data + delta_size
260
358
size = get_delta_hdr_size(&data, top)
261
359
result = PyString_FromStringAndSize(NULL, size)
262
360
dst_buf = <unsigned char*>PyString_AS_STRING(result)
263
# XXX: The original code added a trailing null here, but this shouldn't be
264
# necessary when using PyString_FromStringAndSize
277
cp_off = cp_off | (data[0] << 8)
280
cp_off = cp_off | (data[0] << 16)
283
cp_off = cp_off | (data[0] << 24)
289
cp_size = cp_size | (data[0] << 8)
292
cp_size = cp_size | (data[0] << 16)
296
if (cp_off + cp_size < cp_size or
297
cp_off + cp_size > source_size or
299
raise RuntimeError('Something wrong with:'
300
' cp_off = %s, cp_size = %s'
301
' source_size = %s, size = %s'
302
% (cp_off, cp_size, source_size, size))
303
memcpy(out, source + cp_off, cp_size)
305
size = size - cp_size
308
raise RuntimeError('Insert instruction longer than remaining'
309
' bytes: %d > %d' % (cmd, size))
310
memcpy(out, data, cmd)
316
# * cmd == 0 is reserved for future encoding
317
# * extensions. In the mean time we must fail when
318
# * encountering them (might be data corruption).
320
## /* XXX: error("unexpected delta opcode 0"); */
321
raise RuntimeError('Got delta opcode: 0, not supported')
370
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
371
if (cp_off + cp_size < cp_size or
372
cp_off + cp_size > source_size or
376
memcpy(out, source + cp_off, cp_size)
378
size = size - cp_size
382
# cmd == 0 is reserved for future encoding
383
# extensions. In the mean time we must fail when
384
# encountering them (might be data corruption).
390
memcpy(out, data, cmd)
396
raise ValueError('Something wrong with:'
397
' cp_off = %s, cp_size = %s'
398
' source_size = %s, size = %s'
399
% (cp_off, cp_size, source_size, size))
401
raise ValueError('Got delta opcode: 0, not supported')
403
raise ValueError('Insert instruction longer than remaining'
404
' bytes: %d > %d' % (cmd, size))
324
407
if (data != top or size != 0):
325
## /* XXX: error("delta replay has gone wild"); */
326
408
raise RuntimeError('Did not extract the number of bytes we expected'
327
409
' we were left with %d bytes in "size", and top - data = %d'
328
410
% (size, <int>(top - data)))
331
413
# *dst_size = out - dst_buf;
332
assert (out - dst_buf) == PyString_GET_SIZE(result)
414
if (out - dst_buf) != PyString_GET_SIZE(result):
415
raise RuntimeError('Number of bytes extracted did not match the'
416
' size encoded in the delta header.')
399
483
# We take off 1, because we have to be able to decode the non-expanded byte
400
484
num_low_bytes = PyString_GET_SIZE(bytes) - 1
401
485
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
402
val |= (c_bytes[offset] & 0x7F) << shift
486
val = val | ((c_bytes[offset] & 0x7F) << shift)
403
487
shift = shift + 7
404
488
offset = offset + 1
405
489
if c_bytes[offset] & 0x80:
406
490
raise ValueError('Data not properly formatted, we ran out of'
407
491
' bytes before 0x80 stopped being set.')
408
val |= c_bytes[offset] << shift
492
val = val | (c_bytes[offset] << shift)
409
493
offset = offset + 1
411
495
uval = <unsigned int> val