17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef int Py_ssize_t # Required for older pyrex versions
26
int PyString_CheckExact(object)
27
char * PyString_AS_STRING(object)
28
Py_ssize_t PyString_GET_SIZE(object)
29
object PyString_FromStringAndSize(char *, Py_ssize_t)
19
32
cdef extern from *:
20
33
ctypedef unsigned long size_t
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
34
void * malloc(size_t) nogil
35
void * realloc(void *, size_t) nogil
36
void free(void *) nogil
37
void memcpy(void *, void *, size_t) nogil
26
40
cdef extern from "delta.h":
27
41
struct source_info:
30
44
unsigned long agg_offset
31
45
struct delta_index:
33
delta_index * create_delta_index(source_info *src, delta_index *old)
47
delta_index * create_delta_index(source_info *src, delta_index *old) nogil
34
48
delta_index * create_delta_index_from_delta(source_info *delta,
36
void free_delta_index(delta_index *index)
49
delta_index *old) nogil
50
void free_delta_index(delta_index *index) nogil
37
51
void *create_delta(delta_index *indexes,
38
52
void *buf, unsigned long bufsize,
39
unsigned long *delta_size, unsigned long max_delta_size)
53
unsigned long *delta_size, unsigned long max_delta_size) nogil
40
54
unsigned long get_delta_hdr_size(unsigned char **datap,
55
unsigned char *top) nogil
42
56
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
54
59
cdef void *safe_malloc(size_t count) except NULL:
171
183
src.size = c_source_size
173
185
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
175
186
self._source_offset = src.agg_offset + src.size
177
free_delta_index(self._index)
187
# We delay creating the index on the first insert
188
if source_location != 0:
190
index = create_delta_index(src, self._index)
192
free_delta_index(self._index)
195
cdef _populate_first_index(self):
196
cdef delta_index *index
197
if len(self._sources) != 1 or self._index != NULL:
198
raise AssertionError('_populate_first_index should only be'
199
' called when we have a single source and no index yet')
201
# We know that self._index is already NULL, so whatever
202
# create_delta_index returns is fine
204
self._index = create_delta_index(&self._source_infos[0], NULL)
205
assert self._index != NULL
180
207
cdef _expand_sources(self):
181
208
raise RuntimeError('if we move self._source_infos, then we need to'
204
235
# TODO: inline some of create_delta so we at least don't have to double
205
236
# malloc, and can instead use PyString_FromStringAndSize, to
206
237
# allocate the bytes into the final string
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
238
c_max_delta_size = max_delta_size
240
delta = create_delta(self._index,
242
&delta_size, c_max_delta_size)
212
245
result = PyString_FromStringAndSize(<char *>delta, delta_size)
245
278
return _apply_delta(source, source_size, delta, delta_size)
281
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
282
unsigned char cmd, unsigned int *offset,
283
unsigned int *length) nogil: # cannot_raise
284
"""Decode a copy instruction from the next few bytes.
286
A copy instruction is a variable number of bytes, so we will parse the
287
bytes we care about, and return the new position, as well as the offset and
288
length referred to in the bytes.
290
:param bytes: Pointer to the start of bytes after cmd
291
:param cmd: The command code
292
:return: Pointer to the bytes just after the last decode byte
294
cdef unsigned int off, size, count
302
off = off | (bytes[count] << 8)
305
off = off | (bytes[count] << 16)
308
off = off | (bytes[count] << 24)
314
size = size | (bytes[count] << 8)
317
size = size | (bytes[count] << 16)
248
326
cdef object _apply_delta(char *source, Py_ssize_t source_size,
249
327
char *delta, Py_ssize_t delta_size):
250
328
"""common functionality between apply_delta and apply_delta_to_source."""
251
329
cdef unsigned char *data, *top
252
330
cdef unsigned char *dst_buf, *out, cmd
253
331
cdef Py_ssize_t size
254
cdef unsigned long cp_off, cp_size
332
cdef unsigned int cp_off, cp_size
256
335
data = <unsigned char *>delta
257
336
top = data + delta_size
260
339
size = get_delta_hdr_size(&data, top)
261
340
result = PyString_FromStringAndSize(NULL, size)
262
341
dst_buf = <unsigned char*>PyString_AS_STRING(result)
263
# XXX: The original code added a trailing null here, but this shouldn't be
264
# necessary when using PyString_FromStringAndSize
277
cp_off = cp_off | (data[0] << 8)
280
cp_off = cp_off | (data[0] << 16)
283
cp_off = cp_off | (data[0] << 24)
289
cp_size = cp_size | (data[0] << 8)
292
cp_size = cp_size | (data[0] << 16)
296
if (cp_off + cp_size < cp_size or
297
cp_off + cp_size > source_size or
299
raise RuntimeError('Something wrong with:'
300
' cp_off = %s, cp_size = %s'
301
' source_size = %s, size = %s'
302
% (cp_off, cp_size, source_size, size))
303
memcpy(out, source + cp_off, cp_size)
305
size = size - cp_size
308
raise RuntimeError('Insert instruction longer than remaining'
309
' bytes: %d > %d' % (cmd, size))
310
memcpy(out, data, cmd)
316
# * cmd == 0 is reserved for future encoding
317
# * extensions. In the mean time we must fail when
318
# * encountering them (might be data corruption).
320
## /* XXX: error("unexpected delta opcode 0"); */
321
raise RuntimeError('Got delta opcode: 0, not supported')
351
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
352
if (cp_off + cp_size < cp_size or
353
cp_off + cp_size > source_size or
357
memcpy(out, source + cp_off, cp_size)
359
size = size - cp_size
363
# cmd == 0 is reserved for future encoding
364
# extensions. In the mean time we must fail when
365
# encountering them (might be data corruption).
371
memcpy(out, data, cmd)
377
raise ValueError('Something wrong with:'
378
' cp_off = %s, cp_size = %s'
379
' source_size = %s, size = %s'
380
% (cp_off, cp_size, source_size, size))
382
raise ValueError('Got delta opcode: 0, not supported')
384
raise ValueError('Insert instruction longer than remaining'
385
' bytes: %d > %d' % (cmd, size))
324
388
if (data != top or size != 0):
325
## /* XXX: error("delta replay has gone wild"); */
326
389
raise RuntimeError('Did not extract the number of bytes we expected'
327
390
' we were left with %d bytes in "size", and top - data = %d'
328
391
% (size, <int>(top - data)))
331
394
# *dst_size = out - dst_buf;
332
assert (out - dst_buf) == PyString_GET_SIZE(result)
395
if (out - dst_buf) != PyString_GET_SIZE(result):
396
raise RuntimeError('Number of bytes extracted did not match the'
397
' size encoded in the delta header.')
399
464
# We take off 1, because we have to be able to decode the non-expanded byte
400
465
num_low_bytes = PyString_GET_SIZE(bytes) - 1
401
466
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
402
val |= (c_bytes[offset] & 0x7F) << shift
467
val = val | ((c_bytes[offset] & 0x7F) << shift)
403
468
shift = shift + 7
404
469
offset = offset + 1
405
470
if c_bytes[offset] & 0x80:
406
471
raise ValueError('Data not properly formatted, we ran out of'
407
472
' bytes before 0x80 stopped being set.')
408
val |= c_bytes[offset] << shift
473
val = val | (c_bytes[offset] << shift)
409
474
offset = offset + 1
411
476
uval = <unsigned int> val