1
# Copyright (C) 2007,2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Pyrex implementation for bencode coder/decoder"""
20
cdef extern from "stddef.h":
21
ctypedef unsigned int size_t
23
cdef extern from "Python.h":
24
ctypedef int Py_ssize_t
25
int PyInt_CheckExact(object o)
26
int PyLong_CheckExact(object o)
27
int PyString_CheckExact(object o)
28
int PyTuple_CheckExact(object o)
29
int PyList_CheckExact(object o)
30
int PyDict_CheckExact(object o)
31
int PyBool_Check(object o)
32
object PyString_FromStringAndSize(char *v, Py_ssize_t len)
33
char *PyString_AS_STRING(object o) except NULL
34
Py_ssize_t PyString_GET_SIZE(object o) except -1
35
object PyInt_FromString(char *str, char **pend, int base)
36
int Py_GetRecursionLimit()
37
int Py_EnterRecursiveCall(char *)
38
void Py_LeaveRecursiveCall()
40
int PyList_Append(object, object) except -1
42
cdef extern from "stdlib.h":
43
void free(void *memblock)
44
void *malloc(size_t size)
45
void *realloc(void *memblock, size_t size)
46
long strtol(char *, char **, int)
48
cdef extern from "string.h":
49
void *memcpy(void *dest, void *src, size_t count)
51
cdef extern from "python-compat.h":
52
int snprintf(char* buffer, size_t nsize, char* fmt, ...)
57
cdef extern from "_bencode_pyx.h":
58
void D_UPDATE_TAIL(Decoder, int n)
59
void E_UPDATE_TAIL(Encoder, int n)
65
cdef readonly char *tail
66
cdef readonly int size
67
cdef readonly int _yield_tuples
70
def __init__(self, s, yield_tuples=0):
71
"""Initialize decoder engine.
72
@param s: Python string.
74
if not PyString_CheckExact(s):
75
raise TypeError("String required")
78
self.tail = PyString_AS_STRING(s)
79
self.size = PyString_GET_SIZE(s)
80
self._yield_tuples = int(yield_tuples)
83
result = self._decode_object()
85
raise ValueError('junk in stream')
88
def decode_object(self):
89
return self._decode_object()
91
cdef object _decode_object(self):
95
raise ValueError('stream underflow')
97
if Py_EnterRecursiveCall("_decode_object"):
98
raise RuntimeError("too deeply nested")
101
if c'0' <= ch <= c'9':
102
return self._decode_string()
104
D_UPDATE_TAIL(self, 1)
105
return self._decode_list()
107
D_UPDATE_TAIL(self, 1)
108
return self._decode_int()
110
D_UPDATE_TAIL(self, 1)
111
return self._decode_dict()
113
raise ValueError('unknown object type identifier %r' % ch)
115
Py_LeaveRecursiveCall()
117
cdef int _read_digits(self, char stop_char) except -1:
120
while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or
121
self.tail[i] == c'-') and i < self.size:
124
if self.tail[i] != stop_char:
125
raise ValueError("Stop character %c not found: %c" %
126
(stop_char, self.tail[i]))
127
if (self.tail[0] == c'0' or
128
(self.tail[0] == c'-' and self.tail[1] == c'0')):
132
raise ValueError # leading zeroes are not allowed
135
cdef object _decode_int(self):
137
i = self._read_digits(c'e')
140
ret = PyInt_FromString(self.tail, NULL, 10)
143
D_UPDATE_TAIL(self, i+1)
146
cdef object _decode_string(self):
149
# strtol allows leading whitespace, negatives, and leading zeros
150
# however, all callers have already checked that '0' <= tail[0] <= '9'
151
# or they wouldn't have called _decode_string
152
# strtol will stop at trailing whitespace, etc
153
n = strtol(self.tail, &next_tail, 10)
154
if next_tail == NULL or next_tail[0] != c':':
155
raise ValueError('string len not terminated by ":"')
156
# strtol allows leading zeros, so validate that we don't have that
157
if (self.tail[0] == c'0'
158
and (n != 0 or (next_tail - self.tail != 1))):
159
raise ValueError('leading zeros are not allowed')
160
D_UPDATE_TAIL(self, next_tail - self.tail + 1)
164
raise ValueError('stream underflow')
166
raise ValueError('string size below zero: %d' % n)
168
result = PyString_FromStringAndSize(self.tail, n)
169
D_UPDATE_TAIL(self, n)
172
cdef object _decode_list(self):
176
if self.tail[0] == c'e':
177
D_UPDATE_TAIL(self, 1)
178
if self._yield_tuples:
183
# As a quick shortcut, check to see if the next object is a
184
# string, since we know that won't be creating recursion
185
# if self.tail[0] >= c'0' and self.tail[0] <= c'9':
186
PyList_Append(result, self._decode_object())
188
raise ValueError('malformed list')
190
cdef object _decode_dict(self):
199
D_UPDATE_TAIL(self, 1)
202
# keys should be strings only
203
if self.tail[0] < c'0' or self.tail[0] > c'9':
204
raise ValueError('key was not a simple string.')
205
key = self._decode_string()
207
raise ValueError('dict keys disordered')
210
value = self._decode_object()
213
raise ValueError('malformed dict')
216
def bdecode(object s):
217
"""Decode string x to Python object"""
218
return Decoder(s).decode()
221
def bdecode_as_tuple(object s):
222
"""Decode string x to Python object, using tuples rather than lists."""
223
return Decoder(s, True).decode()
226
class Bencached(object):
227
__slots__ = ['bencoded']
229
def __init__(self, s):
234
INITSIZE = 1024 # initial size for encoder buffer
239
"""Bencode encoder"""
241
cdef readonly char *tail
242
cdef readonly int size
243
cdef readonly char *buffer
244
cdef readonly int maxsize
246
def __init__(self, int maxsize=INITSIZE):
247
"""Initialize encoder engine
248
@param maxsize: initial size of internal char buffer
256
p = <char*>malloc(maxsize)
258
raise MemoryError('Not enough memory to allocate buffer '
261
self.maxsize = maxsize
270
if self.buffer != NULL and self.size != 0:
271
return PyString_FromStringAndSize(self.buffer, self.size)
275
cdef int _ensure_buffer(self, int required) except 0:
276
"""Ensure that tail of CharTail buffer has enough size.
277
If buffer is not big enough then function try to
280
cdef char *new_buffer
283
if self.size + required < self.maxsize:
286
new_size = self.maxsize
287
while new_size < self.size + required:
288
new_size = new_size * 2
289
new_buffer = <char*>realloc(self.buffer, <size_t>new_size)
290
if new_buffer == NULL:
291
raise MemoryError('Cannot realloc buffer for encoder')
293
self.buffer = new_buffer
294
self.maxsize = new_size
295
self.tail = &new_buffer[self.size]
298
cdef int _encode_int(self, int x) except 0:
299
"""Encode int to bencode string iNNNe
300
@param x: value to encode
303
self._ensure_buffer(INT_BUF_SIZE)
304
n = snprintf(self.tail, INT_BUF_SIZE, "i%de", x)
306
raise MemoryError('int %d too big to encode' % x)
307
E_UPDATE_TAIL(self, n)
310
cdef int _encode_long(self, x) except 0:
311
return self._append_string(''.join(('i', str(x), 'e')))
313
cdef int _append_string(self, s) except 0:
315
n = PyString_GET_SIZE(s)
316
self._ensure_buffer(n)
317
memcpy(self.tail, PyString_AS_STRING(s), n)
318
E_UPDATE_TAIL(self, n)
321
cdef int _encode_string(self, x) except 0:
323
cdef Py_ssize_t x_len
324
x_len = PyString_GET_SIZE(x)
325
self._ensure_buffer(x_len + INT_BUF_SIZE)
326
n = snprintf(self.tail, INT_BUF_SIZE, '%d:', x_len)
328
raise MemoryError('string %s too big to encode' % x)
329
memcpy(<void *>(self.tail+n), PyString_AS_STRING(x), x_len)
330
E_UPDATE_TAIL(self, n + x_len)
333
cdef int _encode_list(self, x) except 0:
334
self._ensure_buffer(1)
336
E_UPDATE_TAIL(self, 1)
341
self._ensure_buffer(1)
343
E_UPDATE_TAIL(self, 1)
346
cdef int _encode_dict(self, x) except 0:
347
self._ensure_buffer(1)
349
E_UPDATE_TAIL(self, 1)
354
if not PyString_CheckExact(k):
355
raise TypeError('key in dict should be string')
356
self._encode_string(k)
359
self._ensure_buffer(1)
361
E_UPDATE_TAIL(self, 1)
364
def process(self, object x):
365
if Py_EnterRecursiveCall("encode"):
366
raise RuntimeError("too deeply nested")
368
if PyString_CheckExact(x):
369
self._encode_string(x)
370
elif PyInt_CheckExact(x):
372
elif PyLong_CheckExact(x):
374
elif PyList_CheckExact(x) or PyTuple_CheckExact(x):
376
elif PyDict_CheckExact(x):
378
elif PyBool_Check(x):
379
self._encode_int(int(x))
380
elif isinstance(x, Bencached):
381
self._append_string(x.bencoded)
383
raise TypeError('unsupported type %r' % x)
385
Py_LeaveRecursiveCall()
389
"""Encode Python object x to string"""