~bzr-pqm/bzr/bzr.dev

0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
1
# Copyright (C) 2008 Canonical Limited.
2
# 
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
6
# 
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
# 
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
15
# 
16
17
"""Compiled extensions for doing compression."""
18
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
19
cdef extern from *:
20
    ctypedef unsigned long size_t
21
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
22
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
23
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
24
    void memcpy(void *, void *, size_t)
25
26
cdef extern from "delta.h":
0.23.42 by John Arbash Meinel
Change the code around again.
27
    struct source_info:
28
        void *buf
29
        unsigned long size
30
        unsigned long agg_offset
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
31
    struct delta_index:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
32
        pass
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
33
    delta_index * create_delta_index(source_info *src, delta_index *old)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
34
    delta_index * create_delta_index_from_delta(source_info *delta,
35
                                                delta_index *old)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
36
    void free_delta_index(delta_index *index)
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
37
    void *create_delta(delta_index *indexes,
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
38
             void *buf, unsigned long bufsize,
39
             unsigned long *delta_size, unsigned long max_delta_size)
40
    unsigned long get_delta_hdr_size(unsigned char **datap,
41
                                     unsigned char *top)
42
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
43
    void *patch_delta(void *src_buf, unsigned long src_size,
44
                      void *delta_buf, unsigned long delta_size,
45
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
46
47
cdef extern from "Python.h":
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
48
    int PyString_CheckExact(object)
49
    char * PyString_AS_STRING(object)
50
    Py_ssize_t PyString_GET_SIZE(object)
51
    object PyString_FromStringAndSize(char *, Py_ssize_t)
52
53
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
54
cdef void *safe_malloc(size_t count) except NULL:
55
    cdef void *result
56
    result = malloc(count)
57
    if result == NULL:
58
        raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
59
    return result
60
61
62
cdef void *safe_realloc(void * old, size_t count) except NULL:
63
    cdef void *result
64
    result = realloc(old, count)
65
    if result == NULL:
66
        raise MemoryError('Failed to reallocate to %d bytes of memory'
67
                          % (count,))
68
    return result
69
70
71
cdef int safe_free(void **val) except -1:
72
    assert val != NULL
73
    if val[0] != NULL:
74
        free(val[0])
75
        val[0] = NULL
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
76
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
77
def make_delta_index(source):
78
    return DeltaIndex(source)
79
80
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
81
cdef class DeltaIndex:
82
0.23.40 by John Arbash Meinel
Add a comment why we aren't using the list type for _sources
83
    # We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
84
    # isn't performance critical
85
    # cdef readonly list _sources
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
86
    cdef readonly object _sources
0.23.42 by John Arbash Meinel
Change the code around again.
87
    cdef source_info *_source_infos
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
88
    cdef delta_index *_index
0.23.42 by John Arbash Meinel
Change the code around again.
89
    cdef readonly unsigned int _max_num_sources
0.23.32 by John Arbash Meinel
Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.
90
    cdef public unsigned long _source_offset
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
91
92
    def __repr__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
93
        return '%s(%d, %d)' % (self.__class__.__name__,
94
            len(self._sources), self._source_offset)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
95
96
    def __init__(self, source=None):
97
        self._sources = []
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
98
        self._index = NULL
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
99
        self._max_num_sources = 65000
0.23.42 by John Arbash Meinel
Change the code around again.
100
        self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
101
                                                        * self._max_num_sources)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
102
        self._source_offset = 0
103
104
        if source is not None:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
105
            self.add_source(source, 0)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
106
107
    def __dealloc__(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
108
        if self._index != NULL:
109
            free_delta_index(self._index)
110
            self._index = NULL
0.23.42 by John Arbash Meinel
Change the code around again.
111
        safe_free(<void **>&self._source_infos)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
112
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
113
    def add_delta_source(self, delta, unadded_bytes):
114
        """Add a new delta to the source texts.
115
116
        :param delta: The text of the delta, this must be a byte string.
117
        :param unadded_bytes: Number of bytes that were added to the source
118
            that were not indexed.
119
        """
120
        cdef char *c_delta
121
        cdef Py_ssize_t c_delta_size
122
        cdef delta_index *index
123
        cdef unsigned int source_location
124
        cdef source_info *src
125
        cdef unsigned int num_indexes
126
127
        if not PyString_CheckExact(delta):
128
            raise TypeError('delta is not a str')
129
130
        source_location = len(self._sources)
131
        if source_location >= self._max_num_sources:
132
            self._expand_sources()
133
        self._sources.append(delta)
134
        c_delta = PyString_AS_STRING(delta)
135
        c_delta_size = PyString_GET_SIZE(delta)
136
        src = self._source_infos + source_location
137
        src.buf = c_delta
138
        src.size = c_delta_size
139
        src.agg_offset = self._source_offset + unadded_bytes
140
        index = create_delta_index_from_delta(src, self._index)
141
        self._source_offset = src.agg_offset + src.size
0.23.49 by John Arbash Meinel
When adding new entries to the delta index, use memcpy
142
        if index != NULL:
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
143
            free_delta_index(self._index)
144
            self._index = index
145
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
146
    def add_source(self, source, unadded_bytes):
147
        """Add a new bit of source text to the delta indexes.
148
149
        :param source: The text in question, this must be a byte string
150
        :param unadded_bytes: Assume there are this many bytes that didn't get
151
            added between this source and the end of the previous source.
152
        """
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
153
        cdef char *c_source
154
        cdef Py_ssize_t c_source_size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
155
        cdef delta_index *index
0.23.42 by John Arbash Meinel
Change the code around again.
156
        cdef unsigned int source_location
157
        cdef source_info *src
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
158
        cdef unsigned int num_indexes
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
159
160
        if not PyString_CheckExact(source):
161
            raise TypeError('source is not a str')
162
0.23.42 by John Arbash Meinel
Change the code around again.
163
        source_location = len(self._sources)
164
        if source_location >= self._max_num_sources:
165
            self._expand_sources()
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
166
        self._sources.append(source)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
167
        c_source = PyString_AS_STRING(source)
168
        c_source_size = PyString_GET_SIZE(source)
0.23.42 by John Arbash Meinel
Change the code around again.
169
        src = self._source_infos + source_location
170
        src.buf = c_source
171
        src.size = c_source_size
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
172
0.23.42 by John Arbash Meinel
Change the code around again.
173
        src.agg_offset = self._source_offset + unadded_bytes
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
174
        index = create_delta_index(src, self._index)
0.23.42 by John Arbash Meinel
Change the code around again.
175
        self._source_offset = src.agg_offset + src.size
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
176
        if index != NULL:
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
177
            free_delta_index(self._index)
178
            self._index = index
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
179
0.23.42 by John Arbash Meinel
Change the code around again.
180
    cdef _expand_sources(self):
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
181
        raise RuntimeError('if we move self._source_infos, then we need to'
182
                           ' change all of the index pointers as well.')
0.23.42 by John Arbash Meinel
Change the code around again.
183
        self._max_num_sources = self._max_num_sources * 2
184
        self._source_infos = <source_info *>safe_realloc(self._source_infos,
185
                                                sizeof(source_info)
186
                                                * self._max_num_sources)
187
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
188
    def make_delta(self, target_bytes, max_delta_size=0):
189
        """Create a delta from the current source to the target bytes."""
190
        cdef char *target
191
        cdef Py_ssize_t target_size
192
        cdef void * delta
193
        cdef unsigned long delta_size
194
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
195
        if self._index == NULL:
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
196
            return None
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
197
198
        if not PyString_CheckExact(target_bytes):
199
            raise TypeError('target is not a str')
200
201
        target = PyString_AS_STRING(target_bytes)
202
        target_size = PyString_GET_SIZE(target_bytes)
203
204
        # TODO: inline some of create_delta so we at least don't have to double
205
        #       malloc, and can instead use PyString_FromStringAndSize, to
206
        #       allocate the bytes into the final string
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
207
        delta = create_delta(self._index,
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
208
                             target, target_size,
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
209
                             &delta_size, max_delta_size)
210
        result = None
211
        if delta:
212
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
213
            free(delta)
214
        return result
215
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
216
217
def make_delta(source_bytes, target_bytes):
0.23.42 by John Arbash Meinel
Change the code around again.
218
    """Create a delta, this is a wrapper around DeltaIndex.make_delta."""
219
    di = DeltaIndex(source_bytes)
220
    return di.make_delta(target_bytes)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
221
222
223
def apply_delta(source_bytes, delta_bytes):
224
    """Apply a delta generated by make_delta to source_bytes."""
225
    cdef char *source
226
    cdef Py_ssize_t source_size
227
    cdef char *delta
228
    cdef Py_ssize_t delta_size
229
    cdef unsigned char *data, *top
230
    cdef unsigned char *dst_buf, *out, cmd
231
    cdef Py_ssize_t size
232
    cdef unsigned long cp_off, cp_size
233
234
    if not PyString_CheckExact(source_bytes):
235
        raise TypeError('source is not a str')
236
    if not PyString_CheckExact(delta_bytes):
237
        raise TypeError('delta is not a str')
238
239
    source = PyString_AS_STRING(source_bytes)
240
    source_size = PyString_GET_SIZE(source_bytes)
241
    delta = PyString_AS_STRING(delta_bytes)
242
    delta_size = PyString_GET_SIZE(delta_bytes)
243
244
    # Code taken from patch-delta.c, only brought here to give better error
245
    # handling, and to avoid double allocating memory
246
    if (delta_size < DELTA_SIZE_MIN):
247
        # XXX: Invalid delta block
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
248
        raise RuntimeError('delta_size %d smaller than min delta size %d'
249
                           % (delta_size, DELTA_SIZE_MIN))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
250
251
    data = <unsigned char *>delta
252
    top = data + delta_size
253
254
    # make sure the orig file size matches what we expect
255
    # XXX: gcc warns because data isn't defined as 'const'
256
    size = get_delta_hdr_size(&data, top)
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
257
    if (size > source_size):
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
258
        # XXX: mismatched source size
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
259
        raise RuntimeError('source size %d < expected source size %d'
260
                           % (source_size, size))
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
261
    source_size = size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
262
263
    # now the result size
264
    size = get_delta_hdr_size(&data, top)
265
    result = PyString_FromStringAndSize(NULL, size)
266
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
267
    # XXX: The original code added a trailing null here, but this shouldn't be
268
    #      necessary when using PyString_FromStringAndSize
269
    # dst_buf[size] = 0
270
271
    out = dst_buf
272
    while (data < top):
273
        cmd = data[0]
274
        data = data + 1
275
        if (cmd & 0x80):
276
            cp_off = cp_size = 0
277
            if (cmd & 0x01):
278
                cp_off = data[0]
279
                data = data + 1
280
            if (cmd & 0x02):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
281
                cp_off = cp_off | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
282
                data = data + 1
283
            if (cmd & 0x04):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
284
                cp_off = cp_off | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
285
                data = data + 1
286
            if (cmd & 0x08):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
287
                cp_off = cp_off | (data[0] << 24)
288
                data = data + 1
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
289
            if (cmd & 0x10):
290
                cp_size = data[0]
291
                data = data + 1
292
            if (cmd & 0x20):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
293
                cp_size = cp_size | (data[0] << 8)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
294
                data = data + 1
295
            if (cmd & 0x40):
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
296
                cp_size = cp_size | (data[0] << 16)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
297
                data = data + 1
298
            if (cp_size == 0):
299
                cp_size = 0x10000
300
            if (cp_off + cp_size < cp_size or
301
                cp_off + cp_size > source_size or
302
                cp_size > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
303
                raise RuntimeError('Something wrong with:'
304
                    ' cp_off = %s, cp_size = %s'
305
                    ' source_size = %s, size = %s'
306
                    % (cp_off, cp_size, source_size, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
307
            memcpy(out, source + cp_off, cp_size)
308
            out = out + cp_size
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
309
            size = size - cp_size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
310
        elif (cmd):
311
            if (cmd > size):
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
312
                raise RuntimeError('Insert instruction longer than remaining'
313
                    ' bytes: %d > %d' % (cmd, size))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
314
            memcpy(out, data, cmd)
315
            out = out + cmd
316
            data = data + cmd
0.24.1 by John Arbash Meinel
Make the groupcompress pyrex extension compatible with pyrex 0.9.6.4
317
            size = size - cmd
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
318
        else:
319
            # /*
320
            #  * cmd == 0 is reserved for future encoding
321
            #  * extensions. In the mean time we must fail when
322
            #  * encountering them (might be data corruption).
323
            #  */
324
            ## /* XXX: error("unexpected delta opcode 0"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
325
            raise RuntimeError('Got delta opcode: 0, not supported')
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
326
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
327
    # /* sanity check */
328
    if (data != top or size != 0):
329
        ## /* XXX: error("delta replay has gone wild"); */
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
330
        raise RuntimeError('Did not extract the number of bytes we expected'
331
            ' we were left with %d bytes in "size", and top - data = %d'
332
            % (size, <int>(top - data)))
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
333
        return None
334
335
    # *dst_size = out - dst_buf;
336
    assert (out - dst_buf) == PyString_GET_SIZE(result)
337
    return result