~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_rio_pyx.pyx

Committer: Jelmer Vernooij
Date: 2009-05-14 15:42:42 UTC
mto: (4290.1.9 rio-serializer2)
mto: This revision was merged to the branch mainline in revision 4368.
Revision ID: jelmer@samba.org-20090514154242-n8bquw2crer2yur0

More work using C API's rather than Python objects.

files modified:
bzrlib/_rio_pyx.pyx

Show diffs side-by-side

added added

removed removed

bzrlib/_rio_pyx.pyx

cdef extern from "Python.h":

ctypedef int Py_ssize_t # Required for older pyrex versions

struct _PyObject:

pass

ctypedef _PyObject PyObject

char *PyString_AS_STRING(object s)

Py_ssize_t PyString_GET_SIZE(object t)

Py_ssize_t PyString_GET_SIZE(object t) except -1

object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)

Py_ssize_t PyUnicode_GET_SIZE(object t) except -1

int PyUnicode_Resize(PyObject **o, Py_ssize_t size) except -1

object PyString_FromStringAndSize(char *s, Py_ssize_t len)

int PyString_CheckExact(object)

int PyUnicode_CheckExact(object)

void Py_INCREF(object)

void Py_DECREF(object)

object PyList_GetItem(object, int)

int PyList_SetItem(object, int, object) except -1

int PyList_Size(object) except -1

object PyUnicode_Join(object, object)

object PyUnicode_AsASCIIString(object)

cdef extern from "ctype.h":

int isalnum(char c)

cdef extern from "string.h":

char *strstr(char *a, char *b)

int strcmp(char *a, char *b)

from bzrlib.rio import Stanza

def _valid_tag(tag):

cdef char *c_tag

cdef int c_len

cdef Py_ssize_t c_len

cdef int i

c_tag = PyString_AS_STRING(tag)

c_len = PyString_GET_SIZE(tag)

for i from 0 <= i < c_len:

return False

return True

cdef object _join_utf8_strip(object entries):

cdef PyObject *c_ret

cdef Py_ssize_t size

entries[-1] = entries[-1][:-1]

return PyUnicode_Join(unicode(""), entries)

def _read_stanza_utf8(line_iter):

cdef char *c_line, *colon

cdef Py_ssize_t c_len

pairs = []

tag = None

accum_value = []

# using 'assert' to process user input, or raising ValueError

# rather than a more specific error.

for line in line_iter:

if line is None or line == '':

if line is None:

break # end of file

if not PyString_CheckExact(line):

raise TypeError("%r is not a line" % line)

c_line = PyString_AS_STRING(line)

c_len = PyString_GET_SIZE(line)

if strcmp(c_line, "") == 0:

break # end of file

if line == '\n':

if strcmp(c_line, "\n") == 0:

break # end of stanza

if line[0] == '\t': # continues previous value

if c_line[0] == c'\t': # continues previous value

if tag is None:

raise ValueError('invalid continuation line %r' % line)

accum_value.append('\n' + line[1:-1])

new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")

else: # new tag:value line

if tag is not None:

pairs.append((tag, ''.join(accum_value).decode('utf-8')))

try:

colon_index = line.index(': ')

except ValueError:

100

pairs.append((tag, _join_utf8_strip(accum_value)))

101

colon = <char *>strstr(c_line, ": ")

102

if colon == NULL:

103

raise ValueError('tag/value separator not found in line %r'

104

% line)

tag = line[:colon_index]

#if not _valid_tag(tag):

# raise ValueError("invalid rio tag %r" % (tag,))

accum_value = [line[colon_index+2:-1]]

105

tag = PyString_FromStringAndSize(c_line, colon-c_line)

106

if not _valid_tag(tag):

107

raise ValueError("invalid rio tag %r" % (tag,))

108

accum_value = []

109

new_value = PyUnicode_DecodeUTF8(colon+2, c_len-(colon-c_line+2),

110

"strict")

111

accum_value.append(new_value)

112

if tag is not None: # add last tag-value

pairs.append((tag, ''.join(accum_value).decode('utf-8')))

113

pairs.append((tag, _join_utf8_strip(accum_value)))

114

return Stanza.from_pairs(pairs)

115

else: # didn't see any content

116

return None

117

118

119

def _read_stanza_unicode(unicode_iter):

120

cdef int colon_index

121

pairs = []

122

tag = None

accum_value = None

123

accum_value = []

124

125

# TODO: jam 20060922 This code should raise real errors rather than

126

# using 'assert' to process user input, or raising ValueError

127

# rather than a more specific error.

128

for line in unicode_iter:

if line is None or line == '':

129

if line is None or line == unicode(''):

130

break # end of file

if line == '\n':

131

if line == unicode('\n'):

132

break # end of stanza

real_l = line

if line[0] == '\t': # continues previous value

133

if line[0] == unicode('\t'): # continues previous value

134

if tag is None:

raise ValueError('invalid continuation line %r' % real_l)

accum_value += '\n' + line[1:-1]

135

raise ValueError('invalid continuation line %r' % line)

136

accum_value.append(line[1:])

137

else: # new tag:value line

100

138

if tag is not None:

101

pairs.append((tag, accum_value))

139

pairs.append((tag, PyUnicode_Join(unicode(""), accum_value[:-1])))

102

140

try:

103

colon_index = line.index(': ')

141

colon_index = line.index(unicode(': '))

104

142

except ValueError:

105

143

raise ValueError('tag/value separator not found in line %r'

106

% real_l)

107

tag = str(line[:colon_index])

108

#if not _valid_tag(tag):

109

# raise ValueError("invalid rio tag %r" % (tag,))

110

accum_value = line[colon_index+2:-1]

144

% line)

145

tag = PyUnicode_AsASCIIString(line[0:colon_index])

146

if not _valid_tag(tag):

147

raise ValueError("invalid rio tag %r" % (tag,))

148

accum_value = [line[colon_index+2:]]

111

149

112

150

if tag is not None: # add last tag-value

113

pairs.append((tag, accum_value))

151

pairs.append((tag, PyUnicode_Join(unicode(""), accum_value[:-1])))

114

152

return Stanza.from_pairs(pairs)

115

153

else: # didn't see any content

116

154

return None

Older »