17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef int Py_ssize_t # Required for older pyrex versions
26
int PyString_CheckExact(object)
27
char * PyString_AS_STRING(object)
28
Py_ssize_t PyString_GET_SIZE(object)
29
object PyString_FromStringAndSize(char *, Py_ssize_t)
32
19
cdef extern from *:
33
20
ctypedef unsigned long size_t
34
void * malloc(size_t) nogil
35
void * realloc(void *, size_t) nogil
36
void free(void *) nogil
37
void memcpy(void *, void *, size_t) nogil
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
40
26
cdef extern from "delta.h":
41
27
struct source_info:
44
30
unsigned long agg_offset
45
31
struct delta_index:
47
delta_index * create_delta_index(source_info *src, delta_index *old) nogil
33
delta_index * create_delta_index(source_info *src, delta_index *old)
48
34
delta_index * create_delta_index_from_delta(source_info *delta,
49
delta_index *old) nogil
50
void free_delta_index(delta_index *index) nogil
36
void free_delta_index(delta_index *index)
51
37
void *create_delta(delta_index *indexes,
52
38
void *buf, unsigned long bufsize,
53
unsigned long *delta_size, unsigned long max_delta_size) nogil
39
unsigned long *delta_size, unsigned long max_delta_size)
54
40
unsigned long get_delta_hdr_size(unsigned char **datap,
55
unsigned char *top) nogil
56
42
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
59
54
cdef void *safe_malloc(size_t count) except NULL:
183
171
src.size = c_source_size
185
173
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
186
175
self._source_offset = src.agg_offset + src.size
187
# We delay creating the index on the first insert
188
if source_location != 0:
190
index = create_delta_index(src, self._index)
192
free_delta_index(self._index)
195
cdef _populate_first_index(self):
196
cdef delta_index *index
197
if len(self._sources) != 1 or self._index != NULL:
198
raise AssertionError('_populate_first_index should only be'
199
' called when we have a single source and no index yet')
201
# We know that self._index is already NULL, so whatever
202
# create_delta_index returns is fine
204
self._index = create_delta_index(&self._source_infos[0], NULL)
205
assert self._index != NULL
177
free_delta_index(self._index)
207
180
cdef _expand_sources(self):
208
181
raise RuntimeError('if we move self._source_infos, then we need to'
235
204
# TODO: inline some of create_delta so we at least don't have to double
236
205
# malloc, and can instead use PyString_FromStringAndSize, to
237
206
# allocate the bytes into the final string
238
c_max_delta_size = max_delta_size
240
delta = create_delta(self._index,
242
&delta_size, c_max_delta_size)
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
245
212
result = PyString_FromStringAndSize(<char *>delta, delta_size)
278
245
return _apply_delta(source, source_size, delta, delta_size)
281
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
282
unsigned char cmd, unsigned int *offset,
283
unsigned int *length) nogil: # cannot_raise
284
"""Decode a copy instruction from the next few bytes.
286
A copy instruction is a variable number of bytes, so we will parse the
287
bytes we care about, and return the new position, as well as the offset and
288
length referred to in the bytes.
290
:param bytes: Pointer to the start of bytes after cmd
291
:param cmd: The command code
292
:return: Pointer to the bytes just after the last decode byte
294
cdef unsigned int off, size, count
302
off = off | (bytes[count] << 8)
305
off = off | (bytes[count] << 16)
308
off = off | (bytes[count] << 24)
314
size = size | (bytes[count] << 8)
317
size = size | (bytes[count] << 16)
326
248
cdef object _apply_delta(char *source, Py_ssize_t source_size,
327
249
char *delta, Py_ssize_t delta_size):
328
250
"""common functionality between apply_delta and apply_delta_to_source."""
329
251
cdef unsigned char *data, *top
330
252
cdef unsigned char *dst_buf, *out, cmd
331
253
cdef Py_ssize_t size
332
cdef unsigned int cp_off, cp_size
254
cdef unsigned long cp_off, cp_size
335
256
data = <unsigned char *>delta
336
257
top = data + delta_size
339
260
size = get_delta_hdr_size(&data, top)
340
261
result = PyString_FromStringAndSize(NULL, size)
341
262
dst_buf = <unsigned char*>PyString_AS_STRING(result)
351
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
352
if (cp_off + cp_size < cp_size or
353
cp_off + cp_size > source_size or
357
memcpy(out, source + cp_off, cp_size)
359
size = size - cp_size
363
# cmd == 0 is reserved for future encoding
364
# extensions. In the mean time we must fail when
365
# encountering them (might be data corruption).
371
memcpy(out, data, cmd)
377
raise ValueError('Something wrong with:'
378
' cp_off = %s, cp_size = %s'
379
' source_size = %s, size = %s'
380
% (cp_off, cp_size, source_size, size))
382
raise ValueError('Got delta opcode: 0, not supported')
384
raise ValueError('Insert instruction longer than remaining'
385
' bytes: %d > %d' % (cmd, size))
263
# XXX: The original code added a trailing null here, but this shouldn't be
264
# necessary when using PyString_FromStringAndSize
277
cp_off = cp_off | (data[0] << 8)
280
cp_off = cp_off | (data[0] << 16)
283
cp_off = cp_off | (data[0] << 24)
289
cp_size = cp_size | (data[0] << 8)
292
cp_size = cp_size | (data[0] << 16)
296
if (cp_off + cp_size < cp_size or
297
cp_off + cp_size > source_size or
299
raise RuntimeError('Something wrong with:'
300
' cp_off = %s, cp_size = %s'
301
' source_size = %s, size = %s'
302
% (cp_off, cp_size, source_size, size))
303
memcpy(out, source + cp_off, cp_size)
305
size = size - cp_size
308
raise RuntimeError('Insert instruction longer than remaining'
309
' bytes: %d > %d' % (cmd, size))
310
memcpy(out, data, cmd)
316
# * cmd == 0 is reserved for future encoding
317
# * extensions. In the mean time we must fail when
318
# * encountering them (might be data corruption).
320
## /* XXX: error("unexpected delta opcode 0"); */
321
raise RuntimeError('Got delta opcode: 0, not supported')
388
324
if (data != top or size != 0):
325
## /* XXX: error("delta replay has gone wild"); */
389
326
raise RuntimeError('Did not extract the number of bytes we expected'
390
327
' we were left with %d bytes in "size", and top - data = %d'
391
328
% (size, <int>(top - data)))
394
331
# *dst_size = out - dst_buf;
395
if (out - dst_buf) != PyString_GET_SIZE(result):
396
raise RuntimeError('Number of bytes extracted did not match the'
397
' size encoded in the delta header.')
332
assert (out - dst_buf) == PyString_GET_SIZE(result)
464
399
# We take off 1, because we have to be able to decode the non-expanded byte
465
400
num_low_bytes = PyString_GET_SIZE(bytes) - 1
466
401
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
467
val = val | ((c_bytes[offset] & 0x7F) << shift)
402
val |= (c_bytes[offset] & 0x7F) << shift
468
403
shift = shift + 7
469
404
offset = offset + 1
470
405
if c_bytes[offset] & 0x80:
471
406
raise ValueError('Data not properly formatted, we ran out of'
472
407
' bytes before 0x80 stopped being set.')
473
val = val | (c_bytes[offset] << shift)
408
val |= c_bytes[offset] << shift
474
409
offset = offset + 1
476
411
uval = <unsigned int> val