~bzr-pqm/bzr/bzr.dev : revision 4368

1

2

#

3

# This program is free software; you can redistribute it and/or modify

4

# it under the terms of the GNU General Public License as published by

5

# the Free Software Foundation; either version 2 of the License, or

6

# (at your option) any later version.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, write to the Free Software

15

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

16

17

"""Pyrex implementation of _read_stanza_*."""

18

19

#python2.4 support

20

cdef extern from "python-compat.h":

21

pass

22

23

cdef extern from "malloc.h":

24

void *malloc(int)

25

void *realloc(void *, int)

26

void free(void *)

27

28

cdef extern from "Python.h":

29

ctypedef int Py_ssize_t # Required for older pyrex versions

30

ctypedef int Py_UNICODE

31

char *PyString_AS_STRING(object s)

32

Py_ssize_t PyString_GET_SIZE(object t) except -1

33

object PyUnicode_DecodeUTF8(char *string, Py_ssize_t length, char *errors)

34

object PyString_FromStringAndSize(char *s, Py_ssize_t len)

35

int PyString_CheckExact(object)

36

int PyUnicode_CheckExact(object)

37

object PyUnicode_Join(object, object)

38

object PyUnicode_EncodeASCII(Py_UNICODE *, int, char *)

39

Py_UNICODE *PyUnicode_AS_UNICODE(object)

40

Py_UNICODE *PyUnicode_AsUnicode(object)

41

Py_ssize_t PyUnicode_GET_SIZE(object) except -1

42

int PyList_Append(object, object) except -1

43

int Py_UNICODE_ISLINEBREAK(Py_UNICODE)

44

object PyUnicode_FromUnicode(Py_UNICODE *, int)

45

void *Py_UNICODE_COPY(Py_UNICODE *, Py_UNICODE *, int)

46

47

cdef extern from "string.h":

48

void *memcpy(void *, void *, int)

49

50

from bzrlib.rio import Stanza

51

52

cdef int _valid_tag_char(char c):

53

return (c == c'_' or c == c'-' or

54

(c >= c'a' and c <= c'z') or

55

(c >= c'A' and c <= c'Z') or

56

(c >= c'0' and c <= c'9'))

57

58

59

def _valid_tag(tag):

60

cdef char *c_tag

61

cdef Py_ssize_t c_len

62

cdef int i

63

if not PyString_CheckExact(tag):

64

raise TypeError(tag)

65

c_tag = PyString_AS_STRING(tag)

66

c_len = PyString_GET_SIZE(tag)

67

if c_len < 1:

68

return False

69

for i from 0 <= i < c_len:

70

if not _valid_tag_char(c_tag[i]):

71

return False

72

return True

73

74

75

cdef object _split_first_line_utf8(char *line, int len,

76

char *value, Py_ssize_t *value_len):

77

cdef int i

78

for i from 0 <= i < len:

79

if line[i] == c':':

80

if line[i+1] != c' ':

81

raise ValueError("invalid tag in line %r" % line)

82

memcpy(value, line+i+2, len-i-2)

83

value_len[0] = len-i-2

84

return PyString_FromStringAndSize(line, i)

85

raise ValueError('tag/value separator not found in line %r' % line)

86

87

88

cdef object _split_first_line_unicode(Py_UNICODE *line, int len,

89

Py_UNICODE *value, Py_ssize_t *value_len):

90

cdef int i

91

for i from 0 <= i < len:

92

if line[i] == c':':

93

if line[i+1] != c' ':

94

raise ValueError("invalid tag in line %r" %

95

PyUnicode_FromUnicode(line, len))

96

memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))

97

value_len[0] = len-i-2

98

return PyUnicode_EncodeASCII(line, i, "strict")

99

raise ValueError("tag/value separator not found in line %r" %

100

PyUnicode_FromUnicode(line, len))

101

102

103

def _read_stanza_utf8(line_iter):

104

cdef char *c_line

105

cdef Py_ssize_t c_len

106

cdef char *accum_value, *new_accum_value

107

cdef Py_ssize_t accum_len, accum_size

108

pairs = []

109

tag = None

110

accum_len = 0

111

accum_size = 4096

112

accum_value = <char *>malloc(accum_size)

113

if accum_value == NULL:

114

raise MemoryError

115

try:

116

for line in line_iter:

117

if line is None:

118

break # end of file

119

if not PyString_CheckExact(line):

120

raise TypeError("%r is not a plain string" % line)

121

c_line = PyString_AS_STRING(line)

122

c_len = PyString_GET_SIZE(line)

123

if c_len < 1:

124

break # end of file

125

if c_len == 1 and c_line[0] == c"\n":

126

break # end of stanza

127

if accum_len + c_len > accum_size:

128

accum_size = (accum_len + c_len)

129

new_accum_value = <char *>realloc(accum_value, accum_size)

130

if new_accum_value == NULL:

131

raise MemoryError

132

else:

133

accum_value = new_accum_value

134

if c_line[0] == c'\t': # continues previous value

135

if tag is None:

136

raise ValueError('invalid continuation line %r' % line)

137

memcpy(accum_value+accum_len, c_line+1, c_len-1)

138

accum_len = accum_len + c_len-1

139

else: # new tag:value line

140

if tag is not None:

141

PyList_Append(pairs,

142

(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,

143

"strict")))

144

tag = _split_first_line_utf8(c_line, c_len, accum_value,

145

&accum_len)

146

if not _valid_tag(tag):

147

raise ValueError("invalid rio tag %r" % (tag,))

148

if tag is not None: # add last tag-value

149

PyList_Append(pairs,

150

(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))

151

return Stanza.from_pairs(pairs)

152

else: # didn't see any content

153

return None

154

finally:

155

free(accum_value)

156

157

158

def _read_stanza_unicode(unicode_iter):

159

cdef Py_UNICODE *c_line

160

cdef int c_len

161

cdef Py_UNICODE *accum_value, *new_accum_value

162

cdef Py_ssize_t accum_len, accum_size

163

pairs = []

164

tag = None

165

accum_len = 0

166

accum_size = 4096

167

accum_value = <Py_UNICODE *>malloc(accum_size*sizeof(Py_UNICODE))

168

if accum_value == NULL:

169

raise MemoryError

170

try:

171

for line in unicode_iter:

172

if line is None:

173

break # end of file

174

if not PyUnicode_CheckExact(line):

175

raise TypeError("%r is not a unicode string" % line)

176

c_line = PyUnicode_AS_UNICODE(line)

177

c_len = PyUnicode_GET_SIZE(line)

178

if c_len < 1:

179

break # end of file

180

if Py_UNICODE_ISLINEBREAK(c_line[0]):

181

break # end of stanza

182

if accum_len + c_len > accum_size:

183

accum_size = accum_len + c_len

184

new_accum_value = <Py_UNICODE *>realloc(accum_value,

185

accum_size*sizeof(Py_UNICODE))

186

if new_accum_value == NULL:

187

raise MemoryError

188

else:

189

accum_value = new_accum_value

190

if c_line[0] == c'\t': # continues previous value,

191

if tag is None:

192

raise ValueError('invalid continuation line %r' % line)

193

memcpy(&accum_value[accum_len], &c_line[1],

194

(c_len-1)*sizeof(Py_UNICODE))

195

accum_len = accum_len + (c_len-1)

196

else: # new tag:value line

197

if tag is not None:

198

PyList_Append(pairs,

199

(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

200

tag = _split_first_line_unicode(c_line, c_len, accum_value,

201

&accum_len)

202

if not _valid_tag(tag):

203

raise ValueError("invalid rio tag %r" % (tag,))

204

if tag is not None: # add last tag-value

205

PyList_Append(pairs,

206

(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))

207

return Stanza.from_pairs(pairs)

208

else: # didn't see any content

209

return None

210

finally:

211

free(accum_value)