32
30
object PyString_FromStringAndSize(char *v, Py_ssize_t len)
33
31
char *PyString_AS_STRING(object o) except NULL
34
32
Py_ssize_t PyString_GET_SIZE(object o) except -1
35
object PyInt_FromString(char *str, char **pend, int base)
36
int Py_GetRecursionLimit()
37
int Py_EnterRecursiveCall(char *)
38
void Py_LeaveRecursiveCall()
34
object PyLong_FromLong(unsigned long)
40
int PyList_Append(object, object) except -1
36
cdef extern from "stddef.h":
37
ctypedef unsigned int size_t
42
39
cdef extern from "stdlib.h":
43
40
void free(void *memblock)
44
41
void *malloc(size_t size)
45
42
void *realloc(void *memblock, size_t size)
46
long strtol(char *, char **, int)
43
long int strtol(char *nptr, char **endptr, int base)
48
46
cdef extern from "string.h":
49
47
void *memcpy(void *dest, void *src, size_t count)
48
char *memchr(void *dest, int size, char c)
51
50
cdef extern from "python-compat.h":
52
51
int snprintf(char* buffer, size_t nsize, char* fmt, ...)
57
cdef extern from "_bencode_pyx.h":
58
void D_UPDATE_TAIL(Decoder, int n)
59
void E_UPDATE_TAIL(Encoder, int n)
61
# To maintain compatibility with older versions of pyrex, we have to use the
62
# relative import here, rather than 'bzrlib._static_tuple_c'
63
from _static_tuple_c cimport StaticTuple, StaticTuple_CheckExact, \
66
import_static_tuple_c()
69
54
cdef class Decoder:
70
55
"""Bencode decoder"""
72
57
cdef readonly char *tail
73
cdef readonly int size
74
cdef readonly int _yield_tuples
58
cdef readonly int size
60
cdef readonly long _MAXINT
61
cdef readonly int _MAXN
62
cdef readonly object _longint
63
cdef readonly int _yield_tuples
77
65
def __init__(self, s, yield_tuples=0):
78
66
"""Initialize decoder engine.
81
69
if not PyString_CheckExact(s):
82
70
raise TypeError("String required")
85
72
self.tail = PyString_AS_STRING(s)
86
73
self.size = PyString_GET_SIZE(s)
87
74
self._yield_tuples = int(yield_tuples)
76
self._MAXINT = PyInt_GetMax()
77
self._MAXN = len(str(self._MAXINT))
78
self._longint = long(0)
90
result = self._decode_object()
81
result = self.decode_object()
92
83
raise ValueError('junk in stream')
95
86
def decode_object(self):
96
return self._decode_object()
98
cdef object _decode_object(self):
101
89
if 0 == self.size:
102
90
raise ValueError('stream underflow')
104
if Py_EnterRecursiveCall("_decode_object"):
105
raise RuntimeError("too deeply nested")
108
if c'0' <= ch <= c'9':
109
return self._decode_string()
111
D_UPDATE_TAIL(self, 1)
112
return self._decode_list()
114
D_UPDATE_TAIL(self, 1)
115
return self._decode_int()
117
D_UPDATE_TAIL(self, 1)
118
return self._decode_dict()
120
raise ValueError('unknown object type identifier %r' % ch)
122
Py_LeaveRecursiveCall()
124
cdef int _read_digits(self, char stop_char) except -1:
127
while ((self.tail[i] >= c'0' and self.tail[i] <= c'9') or
128
self.tail[i] == c'-') and i < self.size:
131
if self.tail[i] != stop_char:
132
raise ValueError("Stop character %c not found: %c" %
133
(stop_char, self.tail[i]))
134
if (self.tail[0] == c'0' or
135
(self.tail[0] == c'-' and self.tail[1] == c'0')):
139
raise ValueError # leading zeroes are not allowed
96
return self._decode_int()
97
elif c'0' <= ch <= c'9':
98
return self._decode_string()
101
return self._decode_list()
104
return self._decode_dict()
106
raise ValueError('unknown object type identifier %r' % ch)
108
cdef void _update_tail(self, int n):
109
"""Update tail pointer and resulting size by n characters"""
110
self.size = self.size - n
111
self.tail = &self.tail[n]
142
113
cdef object _decode_int(self):
144
i = self._read_digits(c'e')
147
ret = PyInt_FromString(self.tail, NULL, 10)
150
D_UPDATE_TAIL(self, i+1)
115
result = self._decode_int_until(c'e')
116
if result != self._MAXINT:
121
cdef int _decode_int_until(self, char stop_char) except? -1:
122
"""Decode int from stream until stop_char encountered"""
123
cdef char *actual_tail, *expected_tail
125
expected_tail = memchr(self.tail, self.size, stop_char)
126
if expected_tail == NULL:
128
ret = PyLong_FromLong(strtol(self.tail, &actual_tail, 10))
129
if actual_tail != expected_tail or actual_tail == self.tail:
131
self._update_tail(actual_tail - self.tail)
153
134
cdef object _decode_string(self):
156
# strtol allows leading whitespace, negatives, and leading zeros
157
# however, all callers have already checked that '0' <= tail[0] <= '9'
158
# or they wouldn't have called _decode_string
159
# strtol will stop at trailing whitespace, etc
160
n = strtol(self.tail, &next_tail, 10)
161
if next_tail == NULL or next_tail[0] != c':':
162
raise ValueError('string len not terminated by ":"')
163
# strtol allows leading zeros, so validate that we don't have that
164
if (self.tail[0] == c'0'
165
and (n != 0 or (next_tail - self.tail != 1))):
166
raise ValueError('leading zeros are not allowed')
167
D_UPDATE_TAIL(self, next_tail - self.tail + 1)
137
n = self._decode_int_until(c':')
140
if n == self._MAXINT:
141
# strings longer than 1GB is not supported
142
raise ValueError('too long string')
170
143
if n > self.size:
171
144
raise ValueError('stream underflow')
173
146
raise ValueError('string size below zero: %d' % n)
175
148
result = PyString_FromStringAndSize(self.tail, n)
176
D_UPDATE_TAIL(self, n)
179
152
cdef object _decode_list(self):
302
271
self.tail = &new_buffer[self.size]
274
cdef void _update_tail(self, int n):
275
"""Update tail pointer and resulting size by n characters"""
276
self.size = self.size + n
277
self.tail = &self.tail[n]
305
279
cdef int _encode_int(self, int x) except 0:
306
280
"""Encode int to bencode string iNNNe
307
281
@param x: value to encode
310
self._ensure_buffer(INT_BUF_SIZE)
311
n = snprintf(self.tail, INT_BUF_SIZE, "i%de", x)
284
self._ensure_buffer(32)
285
n = snprintf(self.tail, 32, "i%de", x)
313
287
raise MemoryError('int %d too big to encode' % x)
314
E_UPDATE_TAIL(self, n)
317
291
cdef int _encode_long(self, x) except 0:
318
292
return self._append_string(''.join(('i', str(x), 'e')))
320
294
cdef int _append_string(self, s) except 0:
322
n = PyString_GET_SIZE(s)
323
self._ensure_buffer(n)
324
memcpy(self.tail, PyString_AS_STRING(s), n)
325
E_UPDATE_TAIL(self, n)
295
self._ensure_buffer(PyString_GET_SIZE(s))
296
memcpy(self.tail, PyString_AS_STRING(s), PyString_GET_SIZE(s))
297
self._update_tail(PyString_GET_SIZE(s))
328
300
cdef int _encode_string(self, x) except 0:
330
cdef Py_ssize_t x_len
331
x_len = PyString_GET_SIZE(x)
332
self._ensure_buffer(x_len + INT_BUF_SIZE)
333
n = snprintf(self.tail, INT_BUF_SIZE, '%d:', x_len)
302
self._ensure_buffer(PyString_GET_SIZE(x) + 32)
303
n = snprintf(self.tail, 32, '%d:', PyString_GET_SIZE(x))
335
305
raise MemoryError('string %s too big to encode' % x)
336
memcpy(<void *>(self.tail+n), PyString_AS_STRING(x), x_len)
337
E_UPDATE_TAIL(self, n + x_len)
306
memcpy(<void *>self.tail+n, PyString_AS_STRING(x),
307
PyString_GET_SIZE(x))
308
self._update_tail(n+PyString_GET_SIZE(x))
340
311
cdef int _encode_list(self, x) except 0:
341
self._ensure_buffer(1)
312
self._ensure_buffer(2)
342
313
self.tail[0] = c'l'
343
E_UPDATE_TAIL(self, 1)
348
self._ensure_buffer(1)
349
319
self.tail[0] = c'e'
350
E_UPDATE_TAIL(self, 1)
353
323
cdef int _encode_dict(self, x) except 0:
354
self._ensure_buffer(1)
324
self._ensure_buffer(2)
355
325
self.tail[0] = c'd'
356
E_UPDATE_TAIL(self, 1)
363
333
self._encode_string(k)
364
334
self.process(x[k])
366
self._ensure_buffer(1)
367
336
self.tail[0] = c'e'
368
E_UPDATE_TAIL(self, 1)
371
340
def process(self, object x):
372
if Py_EnterRecursiveCall("encode"):
373
raise RuntimeError("too deeply nested")
375
if PyString_CheckExact(x):
376
self._encode_string(x)
377
elif PyInt_CheckExact(x):
379
elif PyLong_CheckExact(x):
381
elif (PyList_CheckExact(x) or PyTuple_CheckExact(x)
382
or StaticTuple_CheckExact(x)):
384
elif PyDict_CheckExact(x):
386
elif PyBool_Check(x):
387
self._encode_int(int(x))
388
elif isinstance(x, Bencached):
389
self._append_string(x.bencoded)
391
raise TypeError('unsupported type %r' % x)
393
Py_LeaveRecursiveCall()
341
if PyString_CheckExact(x):
342
self._encode_string(x)
343
elif PyInt_CheckExact(x):
345
elif PyLong_CheckExact(x):
347
elif PyList_CheckExact(x) or PyTuple_CheckExact(x):
349
elif PyDict_CheckExact(x):
351
elif PyBool_Check(x):
352
self._encode_int(int(x))
353
elif isinstance(x, Bencached):
354
self._append_string(x.bencoded)
356
raise TypeError('unsupported type %r' % x)