226
226
cdef Py_ssize_t source_size
228
228
cdef Py_ssize_t delta_size
229
cdef unsigned char *data, *top
230
cdef unsigned char *dst_buf, *out, cmd
232
cdef unsigned long cp_off, cp_size
234
230
if not PyString_CheckExact(source_bytes):
235
231
raise TypeError('source is not a str')
236
232
if not PyString_CheckExact(delta_bytes):
237
233
raise TypeError('delta is not a str')
239
234
source = PyString_AS_STRING(source_bytes)
240
235
source_size = PyString_GET_SIZE(source_bytes)
241
236
delta = PyString_AS_STRING(delta_bytes)
242
237
delta_size = PyString_GET_SIZE(delta_bytes)
244
238
# Code taken from patch-delta.c, only brought here to give better error
245
239
# handling, and to avoid double allocating memory
246
240
if (delta_size < DELTA_SIZE_MIN):
248
242
raise RuntimeError('delta_size %d smaller than min delta size %d'
249
243
% (delta_size, DELTA_SIZE_MIN))
245
return _apply_delta(source, source_size, delta, delta_size)
248
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
249
unsigned char cmd, unsigned int *offset, unsigned int *length):
250
"""Decode a copy instruction from the next few bytes.
252
A copy instruction is a variable number of bytes, so we will parse the
253
bytes we care about, and return the new position, as well as the offset and
254
length referred to in the bytes.
256
:param bytes: Pointer to the start of bytes after cmd
257
:param cmd: The command code
258
:return: Pointer to the bytes just after the last decode byte
260
cdef unsigned int off, size, count
268
off = off | (bytes[count] << 8)
271
off = off | (bytes[count] << 16)
274
off = off | (bytes[count] << 24)
280
size = size | (bytes[count] << 8)
283
size = size | (bytes[count] << 16)
292
cdef object _apply_delta(char *source, Py_ssize_t source_size,
293
char *delta, Py_ssize_t delta_size):
294
"""common functionality between apply_delta and apply_delta_to_source."""
295
cdef unsigned char *data, *top
296
cdef unsigned char *dst_buf, *out, cmd
298
cdef unsigned int cp_off, cp_size
251
300
data = <unsigned char *>delta
252
301
top = data + delta_size
254
# make sure the orig file size matches what we expect
255
# XXX: gcc warns because data isn't defined as 'const'
256
size = get_delta_hdr_size(&data, top)
257
if (size > source_size):
258
# XXX: mismatched source size
259
raise RuntimeError('source size %d < expected source size %d'
260
% (source_size, size))
263
303
# now the result size
264
304
size = get_delta_hdr_size(&data, top)
265
305
result = PyString_FromStringAndSize(NULL, size)
266
306
dst_buf = <unsigned char*>PyString_AS_STRING(result)
267
# XXX: The original code added a trailing null here, but this shouldn't be
268
# necessary when using PyString_FromStringAndSize
272
309
while (data < top):
281
cp_off = cp_off | (data[0] << 8)
284
cp_off = cp_off | (data[0] << 16)
287
cp_off = cp_off | (data[0] << 24)
293
cp_size = cp_size | (data[0] << 8)
296
cp_size = cp_size | (data[0] << 16)
314
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
300
315
if (cp_off + cp_size < cp_size or
301
316
cp_off + cp_size > source_size or
316
337
data = data + cmd
317
338
size = size - cmd
320
# * cmd == 0 is reserved for future encoding
321
# * extensions. In the mean time we must fail when
322
# * encountering them (might be data corruption).
324
## /* XXX: error("unexpected delta opcode 0"); */
325
raise RuntimeError('Got delta opcode: 0, not supported')
328
341
if (data != top or size != 0):
329
## /* XXX: error("delta replay has gone wild"); */
330
342
raise RuntimeError('Did not extract the number of bytes we expected'
331
343
' we were left with %d bytes in "size", and top - data = %d'
332
344
% (size, <int>(top - data)))
335
347
# *dst_size = out - dst_buf;
336
assert (out - dst_buf) == PyString_GET_SIZE(result)
348
if (out - dst_buf) != PyString_GET_SIZE(result):
349
raise RuntimeError('Number of bytes extracted did not match the'
350
' size encoded in the delta header.')
354
def apply_delta_to_source(source, delta_start, delta_end):
355
"""Extract a delta from source bytes, and apply it."""
357
cdef Py_ssize_t c_source_size
359
cdef Py_ssize_t c_delta_size
360
cdef Py_ssize_t c_delta_start, c_delta_end
362
if not PyString_CheckExact(source):
363
raise TypeError('source is not a str')
364
c_source_size = PyString_GET_SIZE(source)
365
c_delta_start = delta_start
366
c_delta_end = delta_end
367
if c_delta_start >= c_source_size:
368
raise ValueError('delta starts after source')
369
if c_delta_end > c_source_size:
370
raise ValueError('delta ends after source')
371
if c_delta_start >= c_delta_end:
372
raise ValueError('delta starts after it ends')
374
c_delta_size = c_delta_end - c_delta_start
375
c_source = PyString_AS_STRING(source)
376
c_delta = c_source + c_delta_start
377
# We don't use source_size, because we know the delta should not refer to
378
# any bytes after it starts
379
return _apply_delta(c_source, c_delta_start, c_delta, c_delta_size)
382
def encode_base128_int(val):
383
"""Convert an integer into a 7-bit lsb encoding."""
384
cdef unsigned int c_val
385
cdef Py_ssize_t count
386
cdef unsigned int num_bytes
387
cdef unsigned char c_bytes[8] # max size for 32-bit int is 5 bytes
391
while c_val >= 0x80 and count < 8:
392
c_bytes[count] = <unsigned char>((c_val | 0x80) & 0xFF)
395
if count >= 8 or c_val >= 0x80:
396
raise ValueError('encode_base128_int overflowed the buffer')
397
c_bytes[count] = <unsigned char>(c_val & 0xFF)
399
return PyString_FromStringAndSize(<char *>c_bytes, count)
402
def decode_base128_int(bytes):
403
"""Decode an integer from a 7-bit lsb encoding."""
406
cdef unsigned int uval
408
cdef Py_ssize_t num_low_bytes
409
cdef unsigned char *c_bytes
414
if not PyString_CheckExact(bytes):
415
raise TypeError('bytes is not a string')
416
c_bytes = <unsigned char*>PyString_AS_STRING(bytes)
417
# We take off 1, because we have to be able to decode the non-expanded byte
418
num_low_bytes = PyString_GET_SIZE(bytes) - 1
419
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
420
val |= (c_bytes[offset] & 0x7F) << shift
423
if c_bytes[offset] & 0x80:
424
raise ValueError('Data not properly formatted, we ran out of'
425
' bytes before 0x80 stopped being set.')
426
val |= c_bytes[offset] << shift
429
uval = <unsigned int> val