23
23
cdef extern from "Python.h":
24
24
ctypedef int Py_ssize_t # Required for older pyrex versions
27
ctypedef _PyObject PyObject
25
ctypedef int Py_UNICODE
28
26
char *PyString_AS_STRING(object s)
29
27
Py_ssize_t PyString_GET_SIZE(object t) except -1
30
28
object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)
31
Py_ssize_t PyUnicode_GET_SIZE(object t) except -1
32
int PyUnicode_Resize(PyObject **o, Py_ssize_t size) except -1
33
29
object PyString_FromStringAndSize(char *s, Py_ssize_t len)
34
30
int PyString_CheckExact(object)
35
31
int PyUnicode_CheckExact(object)
36
void Py_INCREF(object)
37
void Py_DECREF(object)
38
object PyList_GetItem(object, int)
39
int PyList_SetItem(object, int, object) except -1
40
int PyList_Size(object) except -1
41
32
object PyUnicode_Join(object, object)
42
33
object PyUnicode_AsASCIIString(object)
34
object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)
35
Py_UNICODE *PyUnicode_AS_UNICODE(object)
36
Py_UNICODE *PyUnicode_AsUnicode(object)
37
Py_ssize_t PyUnicode_GET_SIZE(object) except -1
43
38
int PyList_Append(object, object) except -1
39
int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
40
int Py_UNICODE_ISSPACE(Py_UNICODE)
41
object PyUnicode_FromUnicode(Py_UNICODE *, int)
46
43
from bzrlib.rio import Stanza
68
cdef object _join_utf8_strip(object entries):
65
cdef object _join_unicode_strip(object entries):
69
66
"""Join a set of unicode strings and strip the last character."""
72
67
# TODO: This creates a new object just without the last character.
73
68
# Ideally, we should just resize it by -1
74
69
entries[-1] = entries[-1][:-1]
75
70
return PyUnicode_Join(unicode(""), entries)
78
cdef object _split_first_line(char *line, int len):
73
cdef object _split_first_line_utf8(char *line, int len):
80
75
for i from 0 <= i < len:
81
76
if line[i] == c':':
86
81
raise ValueError('tag/value separator not found in line %r' % line)
84
cdef Py_UNICODE *colon
85
colon = PyUnicode_AsUnicode(unicode(":"))
87
cdef object _split_first_line_unicode(Py_UNICODE *line, int len):
89
for i from 0 <= i < len:
90
if line[i] == colon[0]:
91
if not Py_UNICODE_ISSPACE(line[i+1]):
92
raise ValueError("invalid tag in line %r" %
93
PyUnicode_FromUnicode(line, len))
94
return (PyUnicode_EncodeASCII(line, i, "strict"),
95
PyUnicode_FromUnicode(line+i+2, len-i-2))
96
raise ValueError("tag/value separator not found in line %r" %
97
PyUnicode_FromUnicode(line, len))
89
100
def _read_stanza_utf8(line_iter):
90
cdef char *c_line, *colon
91
102
cdef Py_ssize_t c_len
101
112
break # end of file
102
113
if not PyString_CheckExact(line):
103
raise TypeError("%r is not a line" % line)
114
raise TypeError("%r is not a plain string" % line)
104
115
c_line = PyString_AS_STRING(line)
105
116
c_len = PyString_GET_SIZE(line)
113
124
new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")
114
125
else: # new tag:value line
115
126
if tag is not None:
116
PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))
127
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
118
(tag, new_value) = _split_first_line(c_line, c_len)
129
(tag, new_value) = _split_first_line_utf8(c_line, c_len)
119
130
if not _valid_tag(tag):
120
131
raise ValueError("invalid rio tag %r" % (tag,))
121
132
accum_value.append(new_value)
122
133
if tag is not None: # add last tag-value
123
PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))
134
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
124
135
return Stanza.from_pairs(pairs)
125
136
else: # didn't see any content
129
140
def _read_stanza_unicode(unicode_iter):
141
cdef Py_UNICODE *c_line
136
148
# using 'assert' to process user input, or raising ValueError
137
149
# rather than a more specific error.
138
150
for line in unicode_iter:
139
if line is None or line == unicode(''):
140
152
break # end of file
141
if line == unicode('\n'):
153
if not PyUnicode_CheckExact(line):
154
raise TypeError("%r is not a unicode string" % line)
155
c_line = PyUnicode_AS_UNICODE(line)
156
c_len = PyUnicode_GET_SIZE(line)
159
if Py_UNICODE_ISLINEBREAK(c_line[0]):
142
160
break # end of stanza
143
if line[0] == unicode('\t'): # continues previous value
161
if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,
162
# strictly speaking this should be \t
145
164
raise ValueError('invalid continuation line %r' % line)
146
PyList_Append(accum_value, line[1:])
165
new_value = PyUnicode_FromUnicode(c_line+sizeof(Py_UNICODE),
147
167
else: # new tag:value line
148
168
if tag is not None:
149
PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))
151
colon_index = line.index(unicode(': '))
153
raise ValueError('tag/value separator not found in line %r'
155
tag = PyUnicode_AsASCIIString(line[0:colon_index])
169
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
170
(tag, new_value) = _split_first_line_unicode(c_line, c_len)
156
171
if not _valid_tag(tag):
157
172
raise ValueError("invalid rio tag %r" % (tag,))
158
accum_value = [line[colon_index+2:]]
174
PyList_Append(accum_value, new_value)
160
175
if tag is not None: # add last tag-value
161
PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))
176
PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))
162
177
return Stanza.from_pairs(pairs)
163
178
else: # didn't see any content