65
cdef object _join_unicode_strip(object entries):
66
"""Join a set of unicode strings and strip the last character."""
67
# TODO: This creates a new object just without the last character.
68
# Ideally, we should just resize it by -1
69
entries[-1] = entries[-1][:-1]
70
return PyUnicode_Join(unicode(""), entries)
73
cdef object _split_first_line_utf8(char *line, int len):
71
cdef object _split_first_line_utf8(char *line, int len,
72
Py_UNICODE *value, int *value_len):
75
74
for i from 0 <= i < len:
76
75
if line[i] == c':':
77
76
if line[i+1] != c' ':
78
77
raise ValueError("invalid tag in line %r" % line)
79
return (PyString_FromStringAndSize(line, i),
80
PyUnicode_DecodeUTF8(line+i+2, len-i-2, "strict"))
78
new_value = PyUnicode_DecodeUTF8(line+i+2, len-i-2, "strict")
79
value_len[0] = PyUnicode_GET_SIZE(new_value)
80
Py_UNICODE_COPY(value, PyUnicode_AS_UNICODE(new_value),
82
return PyString_FromStringAndSize(line, i)
81
83
raise ValueError('tag/value separator not found in line %r' % line)
84
86
cdef Py_UNICODE *colon
85
87
colon = PyUnicode_AsUnicode(unicode(":"))
87
cdef object _split_first_line_unicode(Py_UNICODE *line, int len):
89
cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
90
Py_UNICODE *value, int *value_len):
89
92
for i from 0 <= i < len:
90
93
if line[i] == colon[0]:
91
94
if not Py_UNICODE_ISSPACE(line[i+1]):
92
95
raise ValueError("invalid tag in line %r" %
93
96
PyUnicode_FromUnicode(line, len))
94
return (PyUnicode_EncodeASCII(line, i, "strict"),
95
PyUnicode_FromUnicode(line+i+2, len-i-2))
97
Py_UNICODE_COPY(value, line+(i+2) * sizeof(Py_UNICODE),
99
value_len[0] = len-i-2
100
return PyUnicode_EncodeASCII(line, i, "strict")
96
101
raise ValueError("tag/value separator not found in line %r" %
97
102
PyUnicode_FromUnicode(line, len))
100
105
def _read_stanza_utf8(line_iter):
101
106
cdef char *c_line
102
107
cdef Py_ssize_t c_len
108
cdef Py_UNICODE *accum_value
109
cdef int accum_len, accum_size
107
# TODO: jam 20060922 This code should raise real errors rather than
108
# using 'assert' to process user input, or raising ValueError
109
# rather than a more specific error.
110
for line in line_iter:
113
if not PyString_CheckExact(line):
114
raise TypeError("%r is not a plain string" % line)
115
c_line = PyString_AS_STRING(line)
116
c_len = PyString_GET_SIZE(line)
119
if c_len == 1 and c_line[0] == c"\n":
120
break # end of stanza
121
if c_line[0] == c'\t': # continues previous value
123
raise ValueError('invalid continuation line %r' % line)
124
new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")
125
else: # new tag:value line
127
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
129
(tag, new_value) = _split_first_line_utf8(c_line, c_len)
130
if not _valid_tag(tag):
131
raise ValueError("invalid rio tag %r" % (tag,))
132
accum_value.append(new_value)
133
if tag is not None: # add last tag-value
134
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
135
return Stanza.from_pairs(pairs)
136
else: # didn't see any content
114
accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))
115
if accum_value == NULL:
118
# TODO: jam 20060922 This code should raise real errors rather than
119
# using 'assert' to process user input, or raising ValueError
120
# rather than a more specific error.
121
for line in line_iter:
124
if not PyString_CheckExact(line):
125
raise TypeError("%r is not a plain string" % line)
126
c_line = PyString_AS_STRING(line)
127
c_len = PyString_GET_SIZE(line)
130
if c_len == 1 and c_line[0] == c"\n":
131
break # end of stanza
132
if accum_len + c_len > accum_size:
133
accum_size = (accum_size * 3) / 2
134
accum_value = <Py_UNICODE *>realloc(accum_value,
135
accum_size*sizeof(Py_UNICODE))
136
if accum_value == NULL:
138
if c_line[0] == c'\t': # continues previous value
140
raise ValueError('invalid continuation line %r' % line)
141
new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")
142
else: # new tag:value line
145
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
146
tag = _split_first_line_utf8(c_line, c_len, accum_value,
148
if not _valid_tag(tag):
149
raise ValueError("invalid rio tag %r" % (tag,))
150
if tag is not None: # add last tag-value
152
(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
153
return Stanza.from_pairs(pairs)
154
else: # didn't see any content
140
160
def _read_stanza_unicode(unicode_iter):
141
161
cdef Py_UNICODE *c_line
163
cdef Py_UNICODE *accum_value
164
cdef int accum_len, accum_size
147
# TODO: jam 20060922 This code should raise real errors rather than
148
# using 'assert' to process user input, or raising ValueError
149
# rather than a more specific error.
150
for line in unicode_iter:
153
if not PyUnicode_CheckExact(line):
154
raise TypeError("%r is not a unicode string" % line)
155
c_line = PyUnicode_AS_UNICODE(line)
156
c_len = PyUnicode_GET_SIZE(line)
159
if Py_UNICODE_ISLINEBREAK(c_line[0]):
160
break # end of stanza
161
if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,
162
# strictly speaking this should be \t
164
raise ValueError('invalid continuation line %r' % line)
165
new_value = PyUnicode_FromUnicode(c_line+sizeof(Py_UNICODE),
167
else: # new tag:value line
169
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
170
(tag, new_value) = _split_first_line_unicode(c_line, c_len)
171
if not _valid_tag(tag):
172
raise ValueError("invalid rio tag %r" % (tag,))
174
PyList_Append(accum_value, new_value)
175
if tag is not None: # add last tag-value
176
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
177
return Stanza.from_pairs(pairs)
178
else: # didn't see any content
169
accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))
170
if accum_value == NULL:
173
# TODO: jam 20060922 This code should raise real errors rather than
174
# using 'assert' to process user input, or raising ValueError
175
# rather than a more specific error.
176
for line in unicode_iter:
179
if not PyUnicode_CheckExact(line):
180
raise TypeError("%r is not a unicode string" % line)
181
c_line = PyUnicode_AS_UNICODE(line)
182
c_len = PyUnicode_GET_SIZE(line)
185
if Py_UNICODE_ISLINEBREAK(c_line[0]):
186
break # end of stanza
187
if accum_len + c_len > accum_size:
188
accum_size = (accum_size * 3) / 2
189
accum_value = <Py_UNICODE *>realloc(accum_value,
190
accum_size*sizeof(Py_UNICODE))
191
if accum_value == NULL:
193
if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,
194
# strictly speaking this should be \t
196
raise ValueError('invalid continuation line %r' % line)
197
Py_UNICODE_COPY(accum_value+accum_len*sizeof(Py_UNICODE),
198
c_line+1*sizeof(Py_UNICODE), c_len-1);
199
else: # new tag:value line
201
PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
202
tag = _split_first_line_unicode(c_line, c_len, accum_value,
204
if not _valid_tag(tag):
205
raise ValueError("invalid rio tag %r" % (tag,))
206
if tag is not None: # add last tag-value
207
PyList_Append(pairs, (tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
208
return Stanza.from_pairs(pairs)
209
else: # didn't see any content