17
17
"""Compiled extensions for doing compression."""
20
cdef extern from "python-compat.h":
24
cdef extern from "Python.h":
25
ctypedef int Py_ssize_t # Required for older pyrex versions
26
int PyString_CheckExact(object)
27
char * PyString_AS_STRING(object)
28
Py_ssize_t PyString_GET_SIZE(object)
29
object PyString_FromStringAndSize(char *, Py_ssize_t)
19
32
cdef extern from *:
20
33
ctypedef unsigned long size_t
21
34
void * malloc(size_t)
44
58
void *delta_buf, unsigned long delta_size,
45
59
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
54
62
cdef void *safe_malloc(size_t count) except NULL:
110
118
self._index = NULL
111
119
safe_free(<void **>&self._source_infos)
121
def _has_index(self):
122
return (self._index != NULL)
113
124
def add_delta_source(self, delta, unadded_bytes):
114
125
"""Add a new delta to the source texts.
163
174
source_location = len(self._sources)
164
175
if source_location >= self._max_num_sources:
165
176
self._expand_sources()
177
if source_location != 0 and self._index == NULL:
178
# We were lazy about populating the index, create it now
179
self._populate_first_index()
166
180
self._sources.append(source)
167
181
c_source = PyString_AS_STRING(source)
168
182
c_source_size = PyString_GET_SIZE(source)
171
185
src.size = c_source_size
173
187
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
175
188
self._source_offset = src.agg_offset + src.size
177
free_delta_index(self._index)
189
# We delay creating the index on the first insert
190
if source_location != 0:
191
index = create_delta_index(src, self._index)
193
free_delta_index(self._index)
196
cdef _populate_first_index(self):
197
cdef delta_index *index
198
if len(self._sources) != 1 or self._index != NULL:
199
raise AssertionError('_populate_first_index should only be'
200
' called when we have a single source and no index yet')
202
# We know that self._index is already NULL, so whatever
203
# create_delta_index returns is fine
204
self._index = create_delta_index(&self._source_infos[0], NULL)
205
assert self._index != NULL
180
207
cdef _expand_sources(self):
181
208
raise RuntimeError('if we move self._source_infos, then we need to'
193
220
cdef unsigned long delta_size
195
222
if self._index == NULL:
223
if len(self._sources) == 0:
225
# We were just lazy about generating the index
226
self._populate_first_index()
198
228
if not PyString_CheckExact(target_bytes):
199
229
raise TypeError('target is not a str')
245
275
return _apply_delta(source, source_size, delta, delta_size)
278
cdef unsigned char *_decode_copy_instruction(unsigned char *bytes,
279
unsigned char cmd, unsigned int *offset, unsigned int *length):
280
"""Decode a copy instruction from the next few bytes.
282
A copy instruction is a variable number of bytes, so we will parse the
283
bytes we care about, and return the new position, as well as the offset and
284
length referred to in the bytes.
286
:param bytes: Pointer to the start of bytes after cmd
287
:param cmd: The command code
288
:return: Pointer to the bytes just after the last decode byte
290
cdef unsigned int off, size, count
298
off = off | (bytes[count] << 8)
301
off = off | (bytes[count] << 16)
304
off = off | (bytes[count] << 24)
310
size = size | (bytes[count] << 8)
313
size = size | (bytes[count] << 16)
248
322
cdef object _apply_delta(char *source, Py_ssize_t source_size,
249
323
char *delta, Py_ssize_t delta_size):
250
324
"""common functionality between apply_delta and apply_delta_to_source."""
251
325
cdef unsigned char *data, *top
252
326
cdef unsigned char *dst_buf, *out, cmd
253
327
cdef Py_ssize_t size
254
cdef unsigned long cp_off, cp_size
328
cdef unsigned int cp_off, cp_size
256
330
data = <unsigned char *>delta
257
331
top = data + delta_size
260
334
size = get_delta_hdr_size(&data, top)
261
335
result = PyString_FromStringAndSize(NULL, size)
262
336
dst_buf = <unsigned char*>PyString_AS_STRING(result)
263
# XXX: The original code added a trailing null here, but this shouldn't be
264
# necessary when using PyString_FromStringAndSize
268
339
while (data < top):
277
cp_off = cp_off | (data[0] << 8)
280
cp_off = cp_off | (data[0] << 16)
283
cp_off = cp_off | (data[0] << 24)
289
cp_size = cp_size | (data[0] << 8)
292
cp_size = cp_size | (data[0] << 16)
344
data = _decode_copy_instruction(data, cmd, &cp_off, &cp_size)
296
345
if (cp_off + cp_size < cp_size or
297
346
cp_off + cp_size > source_size or
303
352
memcpy(out, source + cp_off, cp_size)
304
353
out = out + cp_size
305
354
size = size - cp_size
358
# cmd == 0 is reserved for future encoding
359
# extensions. In the mean time we must fail when
360
# encountering them (might be data corruption).
361
raise RuntimeError('Got delta opcode: 0, not supported')
308
363
raise RuntimeError('Insert instruction longer than remaining'
309
364
' bytes: %d > %d' % (cmd, size))
312
367
data = data + cmd
313
368
size = size - cmd
316
# * cmd == 0 is reserved for future encoding
317
# * extensions. In the mean time we must fail when
318
# * encountering them (might be data corruption).
320
## /* XXX: error("unexpected delta opcode 0"); */
321
raise RuntimeError('Got delta opcode: 0, not supported')
324
371
if (data != top or size != 0):
325
## /* XXX: error("delta replay has gone wild"); */
326
372
raise RuntimeError('Did not extract the number of bytes we expected'
327
373
' we were left with %d bytes in "size", and top - data = %d'
328
374
% (size, <int>(top - data)))
331
377
# *dst_size = out - dst_buf;
332
assert (out - dst_buf) == PyString_GET_SIZE(result)
378
if (out - dst_buf) != PyString_GET_SIZE(result):
379
raise RuntimeError('Number of bytes extracted did not match the'
380
' size encoded in the delta header.')
399
447
# We take off 1, because we have to be able to decode the non-expanded byte
400
448
num_low_bytes = PyString_GET_SIZE(bytes) - 1
401
449
while (c_bytes[offset] & 0x80) and offset < num_low_bytes:
402
val |= (c_bytes[offset] & 0x7F) << shift
450
val = val | ((c_bytes[offset] & 0x7F) << shift)
403
451
shift = shift + 7
404
452
offset = offset + 1
405
453
if c_bytes[offset] & 0x80:
406
454
raise ValueError('Data not properly formatted, we ran out of'
407
455
' bytes before 0x80 stopped being set.')
408
val |= c_bytes[offset] << shift
456
val = val | (c_bytes[offset] << shift)
409
457
offset = offset + 1
411
459
uval = <unsigned int> val