~bzr-pqm/bzr/bzr.dev

0.18.13 by John Arbash Meinel
Copy the EquivalenceTable code into pyrex and get it under test.
1
# Copyright (C) 2008 Canonical Limited.
2
# 
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published
5
# by the Free Software Foundation.
6
# 
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
# 
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
15
# 
16
17
"""Compiled extensions for doing compression."""
18
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
19
cdef extern from *:
20
    ctypedef unsigned long size_t
21
    void * malloc(size_t)
0.18.23 by John Arbash Meinel
Now we can add more lines without having to rebuild the whole hash
22
    void * realloc(void *, size_t)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
23
    void free(void *)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
24
    void memcpy(void *, void *, size_t)
25
26
cdef extern from "delta.h":
27
    void * diff_delta(void *src_buf, unsigned long src_bufsize,
28
           void *trg_buf, unsigned long trg_bufsize,
29
           unsigned long *delta_size, unsigned long max_delta_size)
30
    struct delta_index:
31
        unsigned long memsize
32
        void *src_buf
33
        unsigned long src_size
34
        unsigned int hash_mask
35
        # struct index_entry *hash[]
36
    delta_index * create_delta_index(void *buf, unsigned long bufsize)
37
    void free_delta_index(delta_index *index)
38
    void * create_delta(delta_index *index,
39
             void *buf, unsigned long bufsize,
40
             unsigned long *delta_size, unsigned long max_delta_size)
41
    unsigned long get_delta_hdr_size(unsigned char **datap,
42
                                     unsigned char *top)
43
    Py_ssize_t DELTA_SIZE_MIN
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
44
    void *patch_delta(void *src_buf, unsigned long src_size,
45
                      void *delta_buf, unsigned long delta_size,
46
                      unsigned long *dst_size)
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
47
48
cdef extern from "Python.h":
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
49
    int PyString_CheckExact(object)
50
    char * PyString_AS_STRING(object)
51
    Py_ssize_t PyString_GET_SIZE(object)
52
    object PyString_FromStringAndSize(char *, Py_ssize_t)
53
54
55
# cdef void *safe_malloc(size_t count) except NULL:
56
#     cdef void *result
57
#     result = malloc(count)
58
#     if result == NULL:
59
#         raise MemoryError('Failed to allocate %d bytes of memory' % (count,))
60
#     return result
61
# 
62
# 
63
# cdef void *safe_realloc(void * old, size_t count) except NULL:
64
#     cdef void *result
65
#     result = realloc(old, count)
66
#     if result == NULL:
67
#         raise MemoryError('Failed to reallocate to %d bytes of memory'
68
#                           % (count,))
69
#     return result
70
# 
71
# 
72
# cdef int safe_free(void **val) except -1:
73
#     assert val != NULL
74
#     if val[0] != NULL:
75
#         free(val[0])
76
#         val[0] = NULL
77
0.23.17 by John Arbash Meinel
Create a wrapper function, so that lsprof will properly attribute time spent.
78
def make_delta_index(source):
79
    return DeltaIndex(source)
80
81
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
82
cdef class DeltaIndex:
83
84
    cdef object _source
85
    cdef delta_index *_index
86
87
    def __repr__(self):
88
        if self._index == NULL:
89
            return '%s(NULL)' % (self.__class__.__name__,)
90
        return '%s(%d)' % (self.__class__.__name__,
91
            len(self._source))
92
93
    def __init__(self, source):
94
        self._source = None
95
        self._index = NULL
96
        self._create_delta_index(source)
97
98
    def _create_delta_index(self, source):
99
        cdef char *c_source
100
        cdef Py_ssize_t c_source_size
101
102
        if not PyString_CheckExact(source):
103
            raise TypeError('source is not a str')
104
105
        self._source = source
106
        c_source = PyString_AS_STRING(source)
107
        c_source_size = PyString_GET_SIZE(source)
108
109
        # TODO: Are usage is ultimately going to be different than the one that
110
        #       was originally designed. Specifically, we are going to want to
111
        #       be able to update the index by hashing future data. It should
112
        #       fit just fine into the structure. But for now, we just wrap
113
        #       create_delta_index (For example, we could always reserve enough
114
        #       space to hash a 4MB string, etc.)
115
        self._index = create_delta_index(c_source, c_source_size)
116
        # TODO: Handle if _index == NULL
117
118
    cdef _ensure_no_index(self):
119
        if self._index != NULL:
120
            free_delta_index(self._index)
121
            self._index = NULL
122
123
    def __dealloc__(self):
124
        self._ensure_no_index()
125
126
    def make_delta(self, target_bytes, max_delta_size=0):
127
        """Create a delta from the current source to the target bytes."""
128
        cdef char *target
129
        cdef Py_ssize_t target_size
130
        cdef void * delta
131
        cdef unsigned long delta_size
132
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
133
        if self._index == NULL:
134
            return None
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
135
136
        if not PyString_CheckExact(target_bytes):
137
            raise TypeError('target is not a str')
138
139
        target = PyString_AS_STRING(target_bytes)
140
        target_size = PyString_GET_SIZE(target_bytes)
141
142
        # TODO: inline some of create_delta so we at least don't have to double
143
        #       malloc, and can instead use PyString_FromStringAndSize, to
144
        #       allocate the bytes into the final string
145
        delta = create_delta(self._index, target, target_size,
146
                             &delta_size, max_delta_size)
147
        result = None
148
        if delta:
149
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
150
            free(delta)
151
        return result
152
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
153
154
def make_delta(source_bytes, target_bytes):
155
    """Create a delta from source_bytes => target_bytes."""
156
    cdef char *source
157
    cdef Py_ssize_t source_size
158
    cdef char *target
159
    cdef Py_ssize_t target_size
160
    cdef delta_index *index
161
    cdef void * delta
162
    cdef unsigned long delta_size
163
    cdef unsigned long max_delta_size
164
165
    max_delta_size = 0 # Unlimited
166
167
    if not PyString_CheckExact(source_bytes):
168
        raise TypeError('source is not a str')
169
    if not PyString_CheckExact(target_bytes):
170
        raise TypeError('target is not a str')
171
172
    source = PyString_AS_STRING(source_bytes)
173
    source_size = PyString_GET_SIZE(source_bytes)
174
    target = PyString_AS_STRING(target_bytes)
175
    target_size = PyString_GET_SIZE(target_bytes)
176
177
    result = None
178
    index = create_delta_index(source, source_size)
179
    if index != NULL:
180
        delta = create_delta(index, target, target_size,
181
                             &delta_size, max_delta_size)
182
        free_delta_index(index);
183
        if delta:
184
            result = PyString_FromStringAndSize(<char *>delta, delta_size)
185
            free(delta)
186
    return result
187
188
189
def apply_delta(source_bytes, delta_bytes):
190
    """Apply a delta generated by make_delta to source_bytes."""
191
    cdef char *source
192
    cdef Py_ssize_t source_size
193
    cdef char *delta
194
    cdef Py_ssize_t delta_size
195
    cdef unsigned char *data, *top
196
    cdef unsigned char *dst_buf, *out, cmd
197
    cdef Py_ssize_t size
198
    cdef unsigned long cp_off, cp_size
199
200
    if not PyString_CheckExact(source_bytes):
201
        raise TypeError('source is not a str')
202
    if not PyString_CheckExact(delta_bytes):
203
        raise TypeError('delta is not a str')
204
205
    source = PyString_AS_STRING(source_bytes)
206
    source_size = PyString_GET_SIZE(source_bytes)
207
    delta = PyString_AS_STRING(delta_bytes)
208
    delta_size = PyString_GET_SIZE(delta_bytes)
209
210
    # Code taken from patch-delta.c, only brought here to give better error
211
    # handling, and to avoid double allocating memory
212
    if (delta_size < DELTA_SIZE_MIN):
213
        # XXX: Invalid delta block
214
        return None
215
216
    data = <unsigned char *>delta
217
    top = data + delta_size
218
219
    # make sure the orig file size matches what we expect
220
    # XXX: gcc warns because data isn't defined as 'const'
221
    size = get_delta_hdr_size(&data, top)
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
222
    if (size > source_size):
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
223
        # XXX: mismatched source size
224
        return None
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
225
    source_size = size
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
226
227
    # now the result size
228
    size = get_delta_hdr_size(&data, top)
229
    result = PyString_FromStringAndSize(NULL, size)
230
    dst_buf = <unsigned char*>PyString_AS_STRING(result)
231
    # XXX: The original code added a trailing null here, but this shouldn't be
232
    #      necessary when using PyString_FromStringAndSize
233
    # dst_buf[size] = 0
234
235
    out = dst_buf
236
    while (data < top):
237
        cmd = data[0]
238
        data = data + 1
239
        if (cmd & 0x80):
240
            cp_off = cp_size = 0
241
            if (cmd & 0x01):
242
                cp_off = data[0]
243
                data = data + 1
244
            if (cmd & 0x02):
245
                cp_off |= (data[0] << 8)
246
                data = data + 1
247
            if (cmd & 0x04):
248
                cp_off |= (data[0] << 16)
249
                data = data + 1
250
            if (cmd & 0x08):
251
                cp_off |= (data[0] << 24)
252
                data[0] += 1
253
            if (cmd & 0x10):
254
                cp_size = data[0]
255
                data = data + 1
256
            if (cmd & 0x20):
257
                cp_size |= (data[0] << 8)
258
                data = data + 1
259
            if (cmd & 0x40):
260
                cp_size |= (data[0] << 16)
261
                data = data + 1
262
            if (cp_size == 0):
263
                cp_size = 0x10000
264
            if (cp_off + cp_size < cp_size or
265
                cp_off + cp_size > source_size or
266
                cp_size > size):
267
                break
268
            memcpy(out, source + cp_off, cp_size)
269
            out = out + cp_size
270
            size -= cp_size
271
        elif (cmd):
272
            if (cmd > size):
273
                break
274
            memcpy(out, data, cmd)
275
            out = out + cmd
276
            data = data + cmd
277
            size -= cmd
278
        else:
279
            # /*
280
            #  * cmd == 0 is reserved for future encoding
281
            #  * extensions. In the mean time we must fail when
282
            #  * encountering them (might be data corruption).
283
            #  */
284
            ## /* XXX: error("unexpected delta opcode 0"); */
0.18.17 by John Arbash Meinel
We now build the appropriate hash table entries.
285
            return None
286
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
287
    # /* sanity check */
288
    if (data != top or size != 0):
289
        ## /* XXX: error("delta replay has gone wild"); */
290
        return None
291
292
    # *dst_size = out - dst_buf;
293
    assert (out - dst_buf) == PyString_GET_SIZE(result)
294
    return result
0.23.7 by John Arbash Meinel
Add a apply_delta2 function, just in case it matters.
295
296
297
def apply_delta2(source_bytes, delta_bytes):
298
    """Apply a delta generated by make_delta to source_bytes."""
299
    # This defers to the patch-delta code rather than implementing it here
300
    # If this is faster, we can bring the memory allocation and error handling
301
    # into apply_delta(), and leave the primary loop in a separate C func.
302
    cdef char *source, *delta, *target
303
    cdef Py_ssize_t source_size, delta_size
304
    cdef unsigned long target_size
305
306
    if not PyString_CheckExact(source_bytes):
307
        raise TypeError('source is not a str')
308
    if not PyString_CheckExact(delta_bytes):
309
        raise TypeError('delta is not a str')
310
311
    source = PyString_AS_STRING(source_bytes)
312
    source_size = PyString_GET_SIZE(source_bytes)
313
    delta = PyString_AS_STRING(delta_bytes)
314
    delta_size = PyString_GET_SIZE(delta_bytes)
315
316
    target = <char *>patch_delta(source, source_size,
317
                                 delta, delta_size,
318
                                 &target_size)
319
    if target == NULL:
320
        return None
321
    result = PyString_FromStringAndSize(target, target_size)
322
    free(target)
323
    return result