1
# Copyright (C) 2009 Canonical Limited.
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
# GNU General Public License for more details.
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Compiled extensions for doing compression."""
20
ctypedef unsigned long size_t
22
void * realloc(void *, size_t)
24
void memcpy(void *, void *, size_t)
26
cdef extern from "delta.h":
30
unsigned long agg_offset
33
delta_index * create_delta_index(source_info *src, delta_index *old)
34
delta_index * create_delta_index_from_delta(source_info *delta,
36
void free_delta_index(delta_index *index)
37
void *create_delta(delta_index *indexes,
38
void *buf, unsigned long bufsize,
39
unsigned long *delta_size, unsigned long max_delta_size)
40
unsigned long get_delta_hdr_size(unsigned char **datap,
42
Py_ssize_t DELTA_SIZE_MIN
43
void *patch_delta(void *src_buf, unsigned long src_size,
44
void *delta_buf, unsigned long delta_size,
45
unsigned long *dst_size)
47
cdef extern from "Python.h":
48
int PyString_CheckExact(object)
49
char * PyString_AS_STRING(object)
50
Py_ssize_t PyString_GET_SIZE(object)
51
object PyString_FromStringAndSize(char *, Py_ssize_t)
54
cdef void *safe_malloc(size_t count) except NULL:
56
result = malloc(count)
58
raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
62
cdef void *safe_realloc(void * old, size_t count) except NULL:
64
result = realloc(old, count)
66
raise MemoryError('Failed to reallocate to %d bytes of memory'
71
cdef int safe_free(void **val) except -1:
77
def make_delta_index(source):
78
return DeltaIndex(source)
81
cdef class DeltaIndex:
83
# We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
84
# isn't performance critical
85
# cdef readonly list _sources
86
cdef readonly object _sources
87
cdef source_info *_source_infos
88
cdef delta_index *_index
89
cdef readonly unsigned int _max_num_sources
90
cdef public unsigned long _source_offset
93
return '%s(%d, %d)' % (self.__class__.__name__,
94
len(self._sources), self._source_offset)
96
def __init__(self, source=None):
99
self._max_num_sources = 65000
100
self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
101
* self._max_num_sources)
102
self._source_offset = 0
104
if source is not None:
105
self.add_source(source, 0)
107
def __dealloc__(self):
108
if self._index != NULL:
109
free_delta_index(self._index)
111
safe_free(<void **>&self._source_infos)
113
def add_delta_source(self, delta, unadded_bytes):
114
"""Add a new delta to the source texts.
116
:param delta: The text of the delta, this must be a byte string.
117
:param unadded_bytes: Number of bytes that were added to the source
118
that were not indexed.
121
cdef Py_ssize_t c_delta_size
122
cdef delta_index *index
123
cdef unsigned int source_location
124
cdef source_info *src
125
cdef unsigned int num_indexes
127
if not PyString_CheckExact(delta):
128
raise TypeError('delta is not a str')
130
source_location = len(self._sources)
131
if source_location >= self._max_num_sources:
132
self._expand_sources()
133
self._sources.append(delta)
134
c_delta = PyString_AS_STRING(delta)
135
c_delta_size = PyString_GET_SIZE(delta)
136
src = self._source_infos + source_location
138
src.size = c_delta_size
139
src.agg_offset = self._source_offset + unadded_bytes
140
index = create_delta_index_from_delta(src, self._index)
141
self._source_offset = src.agg_offset + src.size
143
free_delta_index(self._index)
146
def add_source(self, source, unadded_bytes):
147
"""Add a new bit of source text to the delta indexes.
149
:param source: The text in question, this must be a byte string
150
:param unadded_bytes: Assume there are this many bytes that didn't get
151
added between this source and the end of the previous source.
154
cdef Py_ssize_t c_source_size
155
cdef delta_index *index
156
cdef unsigned int source_location
157
cdef source_info *src
158
cdef unsigned int num_indexes
160
if not PyString_CheckExact(source):
161
raise TypeError('source is not a str')
163
source_location = len(self._sources)
164
if source_location >= self._max_num_sources:
165
self._expand_sources()
166
self._sources.append(source)
167
c_source = PyString_AS_STRING(source)
168
c_source_size = PyString_GET_SIZE(source)
169
src = self._source_infos + source_location
171
src.size = c_source_size
173
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
175
self._source_offset = src.agg_offset + src.size
177
free_delta_index(self._index)
180
cdef _expand_sources(self):
181
raise RuntimeError('if we move self._source_infos, then we need to'
182
' change all of the index pointers as well.')
183
self._max_num_sources = self._max_num_sources * 2
184
self._source_infos = <source_info *>safe_realloc(self._source_infos,
186
* self._max_num_sources)
188
def make_delta(self, target_bytes, max_delta_size=0):
189
"""Create a delta from the current source to the target bytes."""
191
cdef Py_ssize_t target_size
193
cdef unsigned long delta_size
195
if self._index == NULL:
198
if not PyString_CheckExact(target_bytes):
199
raise TypeError('target is not a str')
201
target = PyString_AS_STRING(target_bytes)
202
target_size = PyString_GET_SIZE(target_bytes)
204
# TODO: inline some of create_delta so we at least don't have to double
205
# malloc, and can instead use PyString_FromStringAndSize, to
206
# allocate the bytes into the final string
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
212
result = PyString_FromStringAndSize(<char *>delta, delta_size)
217
def make_delta(source_bytes, target_bytes):
218
"""Create a delta, this is a wrapper around DeltaIndex.make_delta."""
219
di = DeltaIndex(source_bytes)
220
return di.make_delta(target_bytes)
223
def apply_delta(source_bytes, delta_bytes):
224
"""Apply a delta generated by make_delta to source_bytes."""
226
cdef Py_ssize_t source_size
228
cdef Py_ssize_t delta_size
229
cdef unsigned char *data, *top
230
cdef unsigned char *dst_buf, *out, cmd
232
cdef unsigned long cp_off, cp_size
234
if not PyString_CheckExact(source_bytes):
235
raise TypeError('source is not a str')
236
if not PyString_CheckExact(delta_bytes):
237
raise TypeError('delta is not a str')
239
source = PyString_AS_STRING(source_bytes)
240
source_size = PyString_GET_SIZE(source_bytes)
241
delta = PyString_AS_STRING(delta_bytes)
242
delta_size = PyString_GET_SIZE(delta_bytes)
244
# Code taken from patch-delta.c, only brought here to give better error
245
# handling, and to avoid double allocating memory
246
if (delta_size < DELTA_SIZE_MIN):
247
# XXX: Invalid delta block
248
raise RuntimeError('delta_size %d smaller than min delta size %d'
249
% (delta_size, DELTA_SIZE_MIN))
251
data = <unsigned char *>delta
252
top = data + delta_size
254
# make sure the orig file size matches what we expect
255
# XXX: gcc warns because data isn't defined as 'const'
256
size = get_delta_hdr_size(&data, top)
257
if (size > source_size):
258
# XXX: mismatched source size
259
raise RuntimeError('source size %d < expected source size %d'
260
% (source_size, size))
263
# now the result size
264
size = get_delta_hdr_size(&data, top)
265
result = PyString_FromStringAndSize(NULL, size)
266
dst_buf = <unsigned char*>PyString_AS_STRING(result)
267
# XXX: The original code added a trailing null here, but this shouldn't be
268
# necessary when using PyString_FromStringAndSize
281
cp_off = cp_off | (data[0] << 8)
284
cp_off = cp_off | (data[0] << 16)
287
cp_off = cp_off | (data[0] << 24)
293
cp_size = cp_size | (data[0] << 8)
296
cp_size = cp_size | (data[0] << 16)
300
if (cp_off + cp_size < cp_size or
301
cp_off + cp_size > source_size or
303
raise RuntimeError('Something wrong with:'
304
' cp_off = %s, cp_size = %s'
305
' source_size = %s, size = %s'
306
% (cp_off, cp_size, source_size, size))
307
memcpy(out, source + cp_off, cp_size)
309
size = size - cp_size
312
raise RuntimeError('Insert instruction longer than remaining'
313
' bytes: %d > %d' % (cmd, size))
314
memcpy(out, data, cmd)
320
# * cmd == 0 is reserved for future encoding
321
# * extensions. In the mean time we must fail when
322
# * encountering them (might be data corruption).
324
## /* XXX: error("unexpected delta opcode 0"); */
325
raise RuntimeError('Got delta opcode: 0, not supported')
328
if (data != top or size != 0):
329
## /* XXX: error("delta replay has gone wild"); */
330
raise RuntimeError('Did not extract the number of bytes we expected'
331
' we were left with %d bytes in "size", and top - data = %d'
332
% (size, <int>(top - data)))
335
# *dst_size = out - dst_buf;
336
assert (out - dst_buf) == PyString_GET_SIZE(result)