~bzr-pqm/bzr/bzr.dev : revision 4354.3.10

22

23

cdef extern from "Python.h":

24

ctypedef int Py_ssize_t # Required for older pyrex versions

25

struct _PyObject:

26

pass

27

ctypedef _PyObject PyObject

25

ctypedef int Py_UNICODE

28

26

char *PyString_AS_STRING(object s)

29

27

Py_ssize_t PyString_GET_SIZE(object t) except -1

30

28

object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)

31

Py_ssize_t PyUnicode_GET_SIZE(object t) except -1

32

int PyUnicode_Resize(PyObject **o, Py_ssize_t size) except -1

33

29

object PyString_FromStringAndSize(char *s, Py_ssize_t len)

34

30

int PyString_CheckExact(object)

35

31

int PyUnicode_CheckExact(object)

36

void Py_INCREF(object)

37

void Py_DECREF(object)

38

object PyList_GetItem(object, int)

39

int PyList_SetItem(object, int, object) except -1

40

int PyList_Size(object) except -1

41

32

object PyUnicode_Join(object, object)

42

33

object PyUnicode_AsASCIIString(object)

34

object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)

35

Py_UNICODE *PyUnicode_AS_UNICODE(object)

36

Py_UNICODE *PyUnicode_AsUnicode(object)

37

Py_ssize_t PyUnicode_GET_SIZE(object) except -1

43

38

int PyList_Append(object, object) except -1

44

39

int Py_UNICODE_ISLINEBREAK(Py_UNICODE)

40

int Py_UNICODE_ISSPACE(Py_UNICODE)

41

object PyUnicode_FromUnicode(Py_UNICODE *, int)

45

42

46

43

from bzrlib.rio import Stanza

47

44

65

62

return False

66

63

return True

67

64

68

cdef object _join_utf8_strip(object entries):

65

cdef object _join_unicode_strip(object entries):

69

66

"""Join a set of unicode strings and strip the last character."""

70

cdef PyObject *c_ret

71

cdef Py_ssize_t size

72

67

# TODO: This creates a new object just without the last character.

73

68

# Ideally, we should just resize it by -1

74

69

entries[-1] = entries[-1][:-1]

75

70

return PyUnicode_Join(unicode(""), entries)

76

71

77

72

78

cdef object _split_first_line(char *line, int len):

73

cdef object _split_first_line_utf8(char *line, int len):

79

74

cdef int i

80

75

for i from 0 <= i < len:

81

76

if line[i] == c':':

86

81

raise ValueError('tag/value separator not found in line %r' % line)

87

82

88

83

84

cdef Py_UNICODE *colon

85

colon = PyUnicode_AsUnicode(unicode(":"))

86

87

cdef object _split_first_line_unicode(Py_UNICODE *line, int len):

88

cdef int i

89

for i from 0 <= i < len:

90

if line[i] == colon[0]:

91

if not Py_UNICODE_ISSPACE(line[i+1]):

92

raise ValueError("invalid tag in line %r" %

93

PyUnicode_FromUnicode(line, len))

94

return (PyUnicode_EncodeASCII(line, i, "strict"),

95

PyUnicode_FromUnicode(line+i+2, len-i-2))

96

raise ValueError("tag/value separator not found in line %r" %

97

PyUnicode_FromUnicode(line, len))

98

99

89

100

def _read_stanza_utf8(line_iter):

90

cdef char *c_line, *colon

101

cdef char *c_line

91

102

cdef Py_ssize_t c_len

92

103

pairs = []

93

104

tag = None

100

111

if line is None:

101

112

break # end of file

102

113

if not PyString_CheckExact(line):

103

raise TypeError("%r is not a line" % line)

114

raise TypeError("%r is not a plain string" % line)

104

115

c_line = PyString_AS_STRING(line)

105

116

c_len = PyString_GET_SIZE(line)

106

117

if c_len < 1:

113

124

new_value = PyUnicode_DecodeUTF8(c_line+1, c_len-1, "strict")

114

125

else: # new tag:value line

115

126

if tag is not None:

116

PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))

127

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

117

128

accum_value = []

118

(tag, new_value) = _split_first_line(c_line, c_len)

129

(tag, new_value) = _split_first_line_utf8(c_line, c_len)

119

130

if not _valid_tag(tag):

120

131

raise ValueError("invalid rio tag %r" % (tag,))

121

132

accum_value.append(new_value)

122

133

if tag is not None: # add last tag-value

123

PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))

134

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

124

135

return Stanza.from_pairs(pairs)

125

136

else: # didn't see any content

126

137

return None

127

138

128

139

129

140

def _read_stanza_unicode(unicode_iter):

130

cdef int colon_index

141

cdef Py_UNICODE *c_line

142

cdef int c_len

131

143

pairs = []

132

144

tag = None

133

145

accum_value = []

136

148

# using 'assert' to process user input, or raising ValueError

137

149

# rather than a more specific error.

138

150

for line in unicode_iter:

139

if line is None or line == unicode(''):

151

if line is None:

140

152

break # end of file

141

if line == unicode('\n'):

153

if not PyUnicode_CheckExact(line):

154

raise TypeError("%r is not a unicode string" % line)

155

c_line = PyUnicode_AS_UNICODE(line)

156

c_len = PyUnicode_GET_SIZE(line)

157

if c_len < 1:

158

break # end of file

159

if Py_UNICODE_ISLINEBREAK(c_line[0]):

142

160

break # end of stanza

143

if line[0] == unicode('\t'): # continues previous value

161

if Py_UNICODE_ISSPACE(c_line[0]): # continues previous value,

162

# strictly speaking this should be \t

144

163

if tag is None:

145

164

raise ValueError('invalid continuation line %r' % line)

146

PyList_Append(accum_value, line[1:])

165

new_value = PyUnicode_FromUnicode(c_line+sizeof(Py_UNICODE),

166

c_len-1)

147

167

else: # new tag:value line

148

168

if tag is not None:

149

PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))

150

try:

151

colon_index = line.index(unicode(': '))

152

except ValueError:

153

raise ValueError('tag/value separator not found in line %r'

154

% line)

155

tag = PyUnicode_AsASCIIString(line[0:colon_index])

169

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

170

(tag, new_value) = _split_first_line_unicode(c_line, c_len)

156

171

if not _valid_tag(tag):

157

172

raise ValueError("invalid rio tag %r" % (tag,))

158

accum_value = [line[colon_index+2:]]

159

173

accum_value = []

174

PyList_Append(accum_value, new_value)

160

175

if tag is not None: # add last tag-value

161

PyList_Append(pairs, (tag, _join_utf8_strip(accum_value)))

176

PyList_Append(pairs, (tag, _join_unicode_strip(accum_value)))

162

177

return Stanza.from_pairs(pairs)

163

178

else: # didn't see any content

164

179

return None