~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_rio_pyx.pyx

Committer: Jelmer Vernooij
Date: 2009-05-14 19:10:48 UTC
mto: (4290.1.9 rio-serializer2)
mto: This revision was merged to the branch mainline in revision 4368.
Revision ID: jelmer@samba.org-20090514191048-l5rbnuydsf2bgefc

Use shared data area when parsing pairs in stanza.

files modified:
bzrlib/_rio_pyx.pyx

Show diffs side-by-side

added added

removed removed

bzrlib/_rio_pyx.pyx

cdef extern from "python-compat.h":

pass

cdef extern from "malloc.h":

void *malloc(int)

void *realloc(void *, int)

void free(void *)

cdef extern from "Python.h":

ctypedef int Py_ssize_t # Required for older pyrex versions

ctypedef int Py_UNICODE

int PyString_CheckExact(object)

int PyUnicode_CheckExact(object)

object PyUnicode_Join(object, object)

object PyUnicode_AsASCIIString(object)

object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)

Py_UNICODE *PyUnicode_AS_UNICODE(object)

Py_UNICODE *PyUnicode_AsUnicode(object)

int Py_UNICODE_ISLINEBREAK(Py_UNICODE)

int Py_UNICODE_ISSPACE(Py_UNICODE)

object PyUnicode_FromUnicode(Py_UNICODE *, int)

void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int)

from bzrlib.rio import Stanza

return False

return True

cdef object _join_unicode_strip(object entries):

"""Join a set of unicode strings and strip the last character."""

# TODO: This creates a new object just without the last character.

# Ideally, we should just resize it by -1

entries[-1] = entries[-1][:-1]

return PyUnicode_Join(unicode(""), entries)

cdef object _split_first_line_utf8(char *line, int len):

cdef object _split_first_line_utf8(char *line, int len,

Py_UNICODE *value, int *value_len):

cdef int i

for i from 0 <= i < len:

if line[i] == c':':

if line[i+1] != c' ':

raise ValueError("invalid tag in line %r" % line)

return (PyString_FromStringAndSize(line, i),

PyUnicode_DecodeUTF8(line+i+2, len-i-2, "strict"))

new_value = PyUnicode_DecodeUTF8(line+i+2, len-i-2, "strict")

value_len[0] = PyUnicode_GET_SIZE(new_value)

Py_UNICODE_COPY(value, PyUnicode_AS_UNICODE(new_value),

value_len[0])

return PyString_FromStringAndSize(line, i)

raise ValueError('tag/value separator not found in line %r' % line)

cdef Py_UNICODE *colon

colon = PyUnicode_AsUnicode(unicode(":"))

cdef object _split_first_line_unicode(Py_UNICODE *line, int len):

cdef object _split_first_line_unicode(Py_UNICODE *line, int len,

Py_UNICODE *value, int *value_len):

cdef int i

for i from 0 <= i < len:

if line[i] == colon[0]:

if not Py_UNICODE_ISSPACE(line[i+1]):

raise ValueError("invalid tag in line %r" %

PyUnicode_FromUnicode(line, len))

return (PyUnicode_EncodeASCII(line, i, "strict"),

PyUnicode_FromUnicode(line+i+2, len-i-2))

Py_UNICODE_COPY(value, line+(i+2) * sizeof(Py_UNICODE),

len-i-2)

value_len[0] = len-i-2

100

return PyUnicode_EncodeASCII(line, i, "strict")

101

raise ValueError("tag/value separator not found in line %r" %

102

PyUnicode_FromUnicode(line, len))

103

100

105

def _read_stanza_utf8(line_iter):

101

106

cdef char *c_line

102

107

cdef Py_ssize_t c_len

108

cdef Py_UNICODE *accum_value

109

cdef int accum_len, accum_size

103

110

pairs = []

104

111

tag = None

105

accum_value = []

106

107

# TODO: jam 20060922 This code should raise real errors rather than

108

# using 'assert' to process user input, or raising ValueError

109

# rather than a more specific error.

110

for line in line_iter:

111

if line is None:

112

break # end of file

113

if not PyString_CheckExact(line):

114

raise TypeError("%r is not a plain string" % line)

115

c_line = PyString_AS_STRING(line)

116

c_len = PyString_GET_SIZE(line)

117

if c_len < 1:

118

break # end of file

119

if c_len == 1 and c_line[0] == c"\n":

120

break # end of stanza

121

if c_line[0] == c'\t': # continues previous value

122

if tag is None:

123

raise ValueError('invalid continuation line %r' % line)

124

new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")

125

else: # new tag:value line

126

if tag is not None:

127

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

128

accum_value = []

129

(tag, new_value) = _split_first_line_utf8(c_line, c_len)

130

if not _valid_tag(tag):

131

raise ValueError("invalid rio tag %r" % (tag,))

132

accum_value.append(new_value)

133

if tag is not None: # add last tag-value

134

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

135

return Stanza.from_pairs(pairs)

136

else: # didn't see any content

137

return None

112

accum_len = 0

113

accum_size = 4096

114

accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))

115

if accum_value == NULL:

116

raise MemoryError

117

try:

118

# TODO: jam 20060922 This code should raise real errors rather than

119

# using 'assert' to process user input, or raising ValueError

120

# rather than a more specific error.

121

for line in line_iter:

122

if line is None:

123

break # end of file

124

if not PyString_CheckExact(line):

125

raise TypeError("%r is not a plain string" % line)

126

c_line = PyString_AS_STRING(line)

127

c_len = PyString_GET_SIZE(line)

128

if c_len < 1:

129

break # end of file

130

if c_len == 1 and c_line[0] == c"\n":

131

break # end of stanza

132

if accum_len + c_len > accum_size:

133

accum_size = (accum_size * 3) / 2

134

accum_value = <Py_UNICODE *>realloc(accum_value,

135

accum_size*sizeof(Py_UNICODE))

136

if accum_value == NULL:

137

raise MemoryError

138

if c_line[0] == c'\t': # continues previous value

139

if tag is None:

140

raise ValueError('invalid continuation line %r' % line)

141

new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")

142

else: # new tag:value line

143

if tag is not None:

144

PyList_Append(pairs,

145

(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

146

tag = _split_first_line_utf8(c_line, c_len, accum_value,

147

&accum_len)

148

if not _valid_tag(tag):

149

raise ValueError("invalid rio tag %r" % (tag,))

150

if tag is not None: # add last tag-value

151

PyList_Append(pairs,

152

(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

153

return Stanza.from_pairs(pairs)

154

else: # didn't see any content

155

return None

156

finally:

157

free(accum_value)

138

158

139

159

140

160

def _read_stanza_unicode(unicode_iter):

141

161

cdef Py_UNICODE *c_line

142

162

cdef int c_len

163

cdef Py_UNICODE *accum_value

164

cdef int accum_len, accum_size

143

165

pairs = []

144

166

tag = None

145

accum_value = []

146

147

# TODO: jam 20060922 This code should raise real errors rather than

148

# using 'assert' to process user input, or raising ValueError

149

# rather than a more specific error.

150

for line in unicode_iter:

151

if line is None:

152

break # end of file

153

if not PyUnicode_CheckExact(line):

154

raise TypeError("%r is not a unicode string" % line)

155

c_line = PyUnicode_AS_UNICODE(line)

156

c_len = PyUnicode_GET_SIZE(line)

157

if c_len < 1:

158

break # end of file

159

if Py_UNICODE_ISLINEBREAK(c_line[0]):

160

break # end of stanza

161

if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,

162

# strictly speaking this should be \t

163

if tag is None:

164

raise ValueError('invalid continuation line %r' % line)

165

new_value = PyUnicode_FromUnicode(c_line+sizeof(Py_UNICODE),

166

c_len-1)

167

else: # new tag:value line

168

if tag is not None:

169

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

170

(tag, new_value) = _split_first_line_unicode(c_line, c_len)

171

if not _valid_tag(tag):

172

raise ValueError("invalid rio tag %r" % (tag,))

173

accum_value = []

174

PyList_Append(accum_value, new_value)

175

if tag is not None: # add last tag-value

176

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

177

return Stanza.from_pairs(pairs)

178

else: # didn't see any content

179

return None

180

181

167

accum_len = 0

168

accum_size = 4096

169

accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))

170

if accum_value == NULL:

171

raise MemoryError

172

try:

173

# TODO: jam 20060922 This code should raise real errors rather than

174

# using 'assert' to process user input, or raising ValueError

175

# rather than a more specific error.

176

for line in unicode_iter:

177

if line is None:

178

break # end of file

179

if not PyUnicode_CheckExact(line):

180

raise TypeError("%r is not a unicode string" % line)

181

c_line = PyUnicode_AS_UNICODE(line)

182

c_len = PyUnicode_GET_SIZE(line)

183

if c_len < 1:

184

break # end of file

185

if Py_UNICODE_ISLINEBREAK(c_line[0]):

186

break # end of stanza

187

if accum_len + c_len > accum_size:

188

accum_size = (accum_size * 3) / 2

189

accum_value = <Py_UNICODE *>realloc(accum_value,

190

accum_size*sizeof(Py_UNICODE))

191

if accum_value == NULL:

192

raise MemoryError

193

if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,

194

# strictly speaking this should be \t

195

if tag is None:

196

raise ValueError('invalid continuation line %r' % line)

197

Py_UNICODE_COPY(accum_value+accum_len*sizeof(Py_UNICODE),

198

c_line+1*sizeof(Py_UNICODE), c_len-1);

199

else: # new tag:value line

200

if tag is not None:

201

PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

202

tag = _split_first_line_unicode(c_line, c_len, accum_value,

203

&accum_len)

204

if not _valid_tag(tag):

205

raise ValueError("invalid rio tag %r" % (tag,))

206

if tag is not None: # add last tag-value

207

PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

208

return Stanza.from_pairs(pairs)

209

else: # didn't see any content

210

return None

211

finally:

212

free(accum_value)

Older »