1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
1
# Copyright (C) 2008 Canonical Limited.
3
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
8
7
# This program is distributed in the hope that it will be useful,
9
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
10
# GNU General Public License for more details.
13
12
# You should have received a copy of the GNU General Public License
14
13
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef struct PyObject:
27
ctypedef int Py_ssize_t # Required for older pyrex versions
28
int PyString_CheckExact(object)
29
char * PyString_AS_STRING(object)
30
Py_ssize_t PyString_GET_SIZE(object)
31
object PyString_FromStringAndSize(char *, Py_ssize_t)
34
19
cdef extern from *:
35
20
ctypedef unsigned long size_t
36
void * malloc(size_t) nogil
37
void * realloc(void *, size_t) nogil
38
void free(void *) nogil
39
void memcpy(void *, void *, size_t) nogil
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
42
26
cdef extern from "delta.h":
43
27
struct source_info:
46
30
unsigned long agg_offset
47
31
struct delta_index:
49
delta_index * create_delta_index(source_info *src, delta_index *old) nogil
33
delta_index * create_delta_index(source_info *src, delta_index *old)
50
34
delta_index * create_delta_index_from_delta(source_info *delta,
51
delta_index *old) nogil
52
void free_delta_index(delta_index *index) nogil
36
void free_delta_index(delta_index *index)
53
37
void *create_delta(delta_index *indexes,
54
38
void *buf, unsigned long bufsize,
55
unsigned long *delta_size, unsigned long max_delta_size) nogil
39
unsigned long *delta_size, unsigned long max_delta_size)
56
40
unsigned long get_delta_hdr_size(unsigned char **datap,
57
unsigned char *top) nogil
58
unsigned long sizeof_delta_index(delta_index *index)
59
42
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
62
54
cdef void *safe_malloc(size_t count) except NULL:
294
248
raise RuntimeError('delta_size %d smaller than min delta size %d'
295
249
% (delta_size, DELTA_SIZE_MIN))
297
return _apply_delta(source, source_size, delta, delta_size)
300
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
301
unsigned char cmd, unsigned int *offset,
302
unsigned int *length) nogil: # cannot_raise
303
"""Decode a copy instruction from the next few bytes.
305
A copy instruction is a variable number of bytes, so we will parse the
306
bytes we care about, and return the new position, as well as the offset and
307
length referred to in the bytes.
309
:param bytes: Pointer to the start of bytes after cmd
310
:param cmd: The command code
311
:return: Pointer to the bytes just after the last decode byte
313
cdef unsigned int off, size, count
321
off = off | (bytes[count] << 8)
324
off = off | (bytes[count] << 16)
327
off = off | (bytes[count] << 24)
333
size = size | (bytes[count] << 8)
336
size = size | (bytes[count] << 16)
345
cdef object _apply_delta(char *source, Py_ssize_t source_size,
346
char *delta, Py_ssize_t delta_size):
347
"""common functionality between apply_delta and apply_delta_to_source."""
348
cdef unsigned char *data, *top
349
cdef unsigned char *dst_buf, *out, cmd
351
cdef unsigned int cp_off, cp_size
354
251
data = <unsigned char *>delta
355
252
top = data + delta_size
254
# make sure the orig file size matches what we expect
255
# XXX: gcc warns because data isn't defined as 'const'
256
size = get_delta_hdr_size(&data, top)
257
if (size > source_size):
258
# XXX: mismatched source size
259
raise RuntimeError('source size %d < expected source size %d'
260
% (source_size, size))
357
263
# now the result size
358
264
size = get_delta_hdr_size(&data, top)
359
265
result = PyString_FromStringAndSize(NULL, size)
360
266
dst_buf = <unsigned char*>PyString_AS_STRING(result)
370
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
371
if (cp_off + cp_size < cp_size or
372
cp_off + cp_size > source_size or
376
memcpy(out, source + cp_off, cp_size)
378
size = size - cp_size
382
# cmd == 0 is reserved for future encoding
383
# extensions. In the mean time we must fail when
384
# encountering them (might be data corruption).
390
memcpy(out, data, cmd)
396
raise ValueError('Something wrong with:'
397
' cp_off = %s, cp_size = %s'
398
' source_size = %s, size = %s'
399
% (cp_off, cp_size, source_size, size))
401
raise ValueError('Got delta opcode: 0, not supported')
403
raise ValueError('Insert instruction longer than remaining'
404
' bytes: %d > %d' % (cmd, size))
267
# XXX: The original code added a trailing null here, but this shouldn't be
268
# necessary when using PyString_FromStringAndSize
281
cp_off = cp_off | (data[0] << 8)
284
cp_off = cp_off | (data[0] << 16)
287
cp_off = cp_off | (data[0] << 24)
293
cp_size = cp_size | (data[0] << 8)
296
cp_size = cp_size | (data[0] << 16)
300
if (cp_off + cp_size < cp_size or
301
cp_off + cp_size > source_size or
303
raise RuntimeError('Something wrong with:'
304
' cp_off = %s, cp_size = %s'
305
' source_size = %s, size = %s'
306
% (cp_off, cp_size, source_size, size))
307
memcpy(out, source + cp_off, cp_size)
309
size = size - cp_size
312
raise RuntimeError('Insert instruction longer than remaining'
313
' bytes: %d > %d' % (cmd, size))
314
memcpy(out, data, cmd)
320
# * cmd == 0 is reserved for future encoding
321
# * extensions. In the mean time we must fail when
322
# * encountering them (might be data corruption).
324
## /* XXX: error("unexpected delta opcode 0"); */
325
raise RuntimeError('Got delta opcode: 0, not supported')
407
328
if (data != top or size != 0):
329
## /* XXX: error("delta replay has gone wild"); */
408
330
raise RuntimeError('Did not extract the number of bytes we expected'
409
331
' we were left with %d bytes in "size", and top - data = %d'
410
332
% (size, <int>(top - data)))
413
335
# *dst_size = out - dst_buf;
414
if (out - dst_buf) != PyString_GET_SIZE(result):
415
raise RuntimeError('Number of bytes extracted did not match the'
416
' size encoded in the delta header.')
336
assert (out - dst_buf) == PyString_GET_SIZE(result)
420
def apply_delta_to_source(source, delta_start, delta_end):
421
"""Extract a delta from source bytes, and apply it."""
423
cdef Py_ssize_t c_source_size
425
cdef Py_ssize_t c_delta_size
426
cdef Py_ssize_t c_delta_start, c_delta_end
428
if not PyString_CheckExact(source):
429
raise TypeError('source is not a str')
430
c_source_size = PyString_GET_SIZE(source)
431
c_delta_start = delta_start
432
c_delta_end = delta_end
433
if c_delta_start >= c_source_size:
434
raise ValueError('delta starts after source')
435
if c_delta_end > c_source_size:
436
raise ValueError('delta ends after source')
437
if c_delta_start >= c_delta_end:
438
raise ValueError('delta starts after it ends')
440
c_delta_size = c_delta_end - c_delta_start
441
c_source = PyString_AS_STRING(source)
442
c_delta = c_source + c_delta_start
443
# We don't use source_size, because we know the delta should not refer to
444
# any bytes after it starts
445
return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
448
def encode_base128_int(val):
449
"""Convert an integer into a 7-bit lsb encoding."""
450
cdef unsigned int c_val
451
cdef Py_ssize_t count
452
cdef unsigned int num_bytes
453
cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
457
while c_val >= 0x80 and count < 8:
458
c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
461
if count >= 8 or c_val >= 0x80:
462
raise ValueError('encode_base128_int overflowed the buffer')
463
c_bytes[count] = <unsigned char>(c_val & 0xFF)
465
return PyString_FromStringAndSize(<char *>c_bytes, count)
468
def decode_base128_int(bytes):
469
"""Decode an integer from a 7-bit lsb encoding."""
472
cdef unsigned int uval
474
cdef Py_ssize_t num_low_bytes
475
cdef unsigned char *c_bytes
480
if not PyString_CheckExact(bytes):
481
raise TypeError('bytes is not a string')
482
c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
483
# We take off 1, because we have to be able to decode the non-expanded byte
484
num_low_bytes = PyString_GET_SIZE(bytes) - 1
485
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
486
val = val | ((c_bytes[offset] & 0x7F) << shift)
489
if c_bytes[offset] & 0x80:
490
raise ValueError('Data not properly formatted, we ran out of'
491
' bytes before 0x80 stopped being set.')
492
val = val | (c_bytes[offset] << shift)
495
uval = <unsigned int> val