~bzr-pqm/bzr/bzr.dev : contents of bzrlib/_rio

~bzr-pqm/bzr/bzr.dev : (revision 4945)

4354.3.1 by Jelmer Vernooij Move core RIO parsing functionality to _rio_py.py.	1	# Copyright (C) 2009 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	16
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	17	"""Pyrex implementation of _read_stanza_*."""
	18
4354.3.3 by Jelmer Vernooij More performance tweaks.	19	#python2.4 support
	20	cdef extern from "python-compat.h":
	21	pass
	22
4368.1.1 by Jelmer Vernooij Import malloc and friends from stdlib.h rather than from (deprecated) malloc.h.	23	cdef extern from "stdlib.h":
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	24	void *malloc(int)
	25	void realloc(void , int)
	26	void free(void *)
	27
4354.3.3 by Jelmer Vernooij More performance tweaks.	28	cdef extern from "Python.h":
4354.3.3 by Jelmer Vernooij More performance tweaks.	29	ctypedef int Py_ssize_t # Required for older pyrex versions
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	30	ctypedef int Py_UNICODE
4354.3.3 by Jelmer Vernooij More performance tweaks.	31	char *PyString_AS_STRING(object s)
4354.3.4 by Jelmer Vernooij More work using C API's rather than Python objects.	32	Py_ssize_t PyString_GET_SIZE(object t) except -1
	33	object PyUnicode_DecodeUTF8(char string, Py_ssize_t length, char errors)
	34	object PyString_FromStringAndSize(char *s, Py_ssize_t len)
	35	int PyString_CheckExact(object)
	36	int PyUnicode_CheckExact(object)
	37	object PyUnicode_Join(object, object)
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	38	object PyUnicode_EncodeASCII(Py_UNICODE , int, char )
	39	Py_UNICODE *PyUnicode_AS_UNICODE(object)
	40	Py_UNICODE *PyUnicode_AsUnicode(object)
	41	Py_ssize_t PyUnicode_GET_SIZE(object) except -1
4354.3.9 by Jelmer Vernooij Use PyList_Append.	42	int PyList_Append(object, object) except -1
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	43	int Py_UNICODE_ISLINEBREAK(Py_UNICODE)
	44	object PyUnicode_FromUnicode(Py_UNICODE *, int)
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	45	void Py_UNICODE_COPY(Py_UNICODE , Py_UNICODE *, int)
4354.3.4 by Jelmer Vernooij More work using C API's rather than Python objects.	46
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	47	cdef extern from "string.h":
	48	void memcpy(void , void *, int)
	49
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	50	from bzrlib.rio import Stanza
	51
4634.117.10 by John Arbash Meinel Change 'no except' to 'cannot_raise'	52	cdef int _valid_tag_char(char c): # cannot_raise
4354.3.8 by Jelmer Vernooij Review feedback from John:	53	return (c == c'_' or c == c'-' or
	54	(c >= c'a' and c <= c'z') or
	55	(c >= c'A' and c <= c'Z') or
	56	(c >= c'0' and c <= c'9'))
	57
	58
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	59	def _valid_tag(tag):
4354.3.3 by Jelmer Vernooij More performance tweaks.	60	cdef char *c_tag
4354.3.4 by Jelmer Vernooij More work using C API's rather than Python objects.	61	cdef Py_ssize_t c_len
	62	cdef int i
4354.3.7 by Jelmer Vernooij Simplify unicode stanza reading, check for Type in valid_tag.	63	if not PyString_CheckExact(tag):
	64	raise TypeError(tag)
4354.3.3 by Jelmer Vernooij More performance tweaks.	65	c_tag = PyString_AS_STRING(tag)
4354.3.3 by Jelmer Vernooij More performance tweaks.	66	c_len = PyString_GET_SIZE(tag)
4354.3.12 by Jelmer Vernooij Add tests for _valid_tag.	67	if c_len < 1:
4354.3.12 by Jelmer Vernooij Add tests for _valid_tag.	68	return False
4354.3.3 by Jelmer Vernooij More performance tweaks.	69	for i from 0 <= i < c_len:
4354.3.8 by Jelmer Vernooij Review feedback from John:	70	if not _valid_tag_char(c_tag[i]):
4354.3.3 by Jelmer Vernooij More performance tweaks.	71	return False
4354.3.3 by Jelmer Vernooij More performance tweaks.	72	return True
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	73
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	74
	75	cdef object _split_first_line_utf8(char *line, int len,
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	76	char value, Py_ssize_t value_len):
4354.3.8 by Jelmer Vernooij Review feedback from John:	77	cdef int i
	78	for i from 0 <= i < len:
	79	if line[i] == c':':
	80	if line[i+1] != c' ':
	81	raise ValueError("invalid tag in line %r" % line)
4354.3.14 by Jelmer Vernooij Review feedback from John.	82	memcpy(value, line+i+2, len-i-2)
4354.3.14 by Jelmer Vernooij Review feedback from John.	83	value_len[0] = len-i-2
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	84	return PyString_FromStringAndSize(line, i)
4354.3.8 by Jelmer Vernooij Review feedback from John:	85	raise ValueError('tag/value separator not found in line %r' % line)
4354.3.8 by Jelmer Vernooij Review feedback from John:	86
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	87
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	88	cdef object _split_first_line_unicode(Py_UNICODE *line, int len,
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	89	Py_UNICODE value, Py_ssize_t value_len):
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	90	cdef int i
	91	for i from 0 <= i < len:
4354.3.14 by Jelmer Vernooij Review feedback from John.	92	if line[i] == c':':
4354.3.14 by Jelmer Vernooij Review feedback from John.	93	if line[i+1] != c' ':
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	94	raise ValueError("invalid tag in line %r" %
	95	PyUnicode_FromUnicode(line, len))
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	96	memcpy(value, &line[i+2], (len-i-2) * sizeof(Py_UNICODE))
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	97	value_len[0] = len-i-2
	98	return PyUnicode_EncodeASCII(line, i, "strict")
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	99	raise ValueError("tag/value separator not found in line %r" %
	100	PyUnicode_FromUnicode(line, len))
	101
	102
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	103	def _read_stanza_utf8(line_iter):
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	104	cdef char *c_line
4354.3.4 by Jelmer Vernooij More work using C API's rather than Python objects.	105	cdef Py_ssize_t c_len
4354.3.14 by Jelmer Vernooij Review feedback from John.	106	cdef char accum_value, new_accum_value
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	107	cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij More performance tweaks.	108	pairs = []
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	109	tag = None
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	110	accum_len = 0
	111	accum_size = 4096
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	112	accum_value = <char *>malloc(accum_size)
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	113	if accum_value == NULL:
	114	raise MemoryError
	115	try:
	116	for line in line_iter:
	117	if line is None:
	118	break # end of file
	119	if not PyString_CheckExact(line):
	120	raise TypeError("%r is not a plain string" % line)
	121	c_line = PyString_AS_STRING(line)
	122	c_len = PyString_GET_SIZE(line)
	123	if c_len < 1:
	124	break # end of file
	125	if c_len == 1 and c_line[0] == c"\n":
	126	break # end of stanza
4354.3.14 by Jelmer Vernooij Review feedback from John.	127	if accum_len + c_len > accum_size:
	128	accum_size = (accum_len + c_len)
	129	new_accum_value = <char *>realloc(accum_value, accum_size)
	130	if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	131	raise MemoryError
4354.3.14 by Jelmer Vernooij Review feedback from John.	132	else:
4354.3.14 by Jelmer Vernooij Review feedback from John.	133	accum_value = new_accum_value
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	134	if c_line[0] == c'\t': # continues previous value
	135	if tag is None:
	136	raise ValueError('invalid continuation line %r' % line)
4354.3.14 by Jelmer Vernooij Review feedback from John.	137	memcpy(accum_value+accum_len, c_line+1, c_len-1)
4354.3.14 by Jelmer Vernooij Review feedback from John.	138	accum_len = accum_len + c_len-1
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	139	else: # new tag:value line
	140	if tag is not None:
	141	PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	142	(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1,
	143	"strict")))
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	144	tag = _split_first_line_utf8(c_line, c_len, accum_value,
	145	&accum_len)
	146	if not _valid_tag(tag):
	147	raise ValueError("invalid rio tag %r" % (tag,))
	148	if tag is not None: # add last tag-value
	149	PyList_Append(pairs,
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	150	(tag, PyUnicode_DecodeUTF8(accum_value, accum_len-1, "strict")))
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	151	return Stanza.from_pairs(pairs)
	152	else: # didn't see any content
	153	return None
	154	finally:
	155	free(accum_value)
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	156
	157
	158	def _read_stanza_unicode(unicode_iter):
4354.3.10 by Jelmer Vernooij Use Py_UNICODE in unicode RIO parser.	159	cdef Py_UNICODE *c_line
	160	cdef int c_len
4354.3.14 by Jelmer Vernooij Review feedback from John.	161	cdef Py_UNICODE accum_value, new_accum_value
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	162	cdef Py_ssize_t accum_len, accum_size
4354.3.3 by Jelmer Vernooij More performance tweaks.	163	pairs = []
4354.3.2 by Jelmer Vernooij Provide custom implementation of _read_stanza_utf8 in Pyrex.	164	tag = None
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	165	accum_len = 0
	166	accum_size = 4096
	167	accum_value = <Py_UNICODE >malloc(accum_sizesizeof(Py_UNICODE))
	168	if accum_value == NULL:
	169	raise MemoryError
	170	try:
	171	for line in unicode_iter:
	172	if line is None:
	173	break # end of file
	174	if not PyUnicode_CheckExact(line):
	175	raise TypeError("%r is not a unicode string" % line)
	176	c_line = PyUnicode_AS_UNICODE(line)
	177	c_len = PyUnicode_GET_SIZE(line)
	178	if c_len < 1:
	179	break # end of file
	180	if Py_UNICODE_ISLINEBREAK(c_line[0]):
	181	break # end of stanza
4354.3.14 by Jelmer Vernooij Review feedback from John.	182	if accum_len + c_len > accum_size:
	183	accum_size = accum_len + c_len
	184	new_accum_value = <Py_UNICODE *>realloc(accum_value,
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	185	accum_size*sizeof(Py_UNICODE))
4354.3.14 by Jelmer Vernooij Review feedback from John.	186	if new_accum_value == NULL:
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	187	raise MemoryError
4354.3.14 by Jelmer Vernooij Review feedback from John.	188	else:
	189	accum_value = new_accum_value
	190	if c_line[0] == c'\t': # continues previous value,
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	191	if tag is None:
	192	raise ValueError('invalid continuation line %r' % line)
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	193	memcpy(&accum_value[accum_len], &c_line[1],
4354.3.14 by Jelmer Vernooij Review feedback from John.	194	(c_len-1)*sizeof(Py_UNICODE))
4354.3.14 by Jelmer Vernooij Review feedback from John.	195	accum_len = accum_len + (c_len-1)
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	196	else: # new tag:value line
	197	if tag is not None:
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	198	PyList_Append(pairs,
	199	(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	200	tag = _split_first_line_unicode(c_line, c_len, accum_value,
	201	&accum_len)
	202	if not _valid_tag(tag):
	203	raise ValueError("invalid rio tag %r" % (tag,))
	204	if tag is not None: # add last tag-value
4354.3.13 by Jelmer Vernooij Add more RIO tests, fix bugs in pyrex implementation.	205	PyList_Append(pairs,
	206	(tag, PyUnicode_FromUnicode(accum_value, accum_len-1)))
4354.3.11 by Jelmer Vernooij Use shared data area when parsing pairs in stanza.	207	return Stanza.from_pairs(pairs)
	208	else: # didn't see any content
	209	return None
	210	finally:
	211	free(accum_value)