~bzr-pqm/bzr/bzr.dev : contents of bzrlib/rio.py at revision 4296

~bzr-pqm/bzr/bzr.dev : (revision 4296)

2052.3.2 by John Arbash Meinel Change Copyright .. by Canonical to Copyright ... Canonical	1	# Copyright (C) 2005 Canonical Ltd
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	2	#
2052.3.1 by John Arbash Meinel Add tests to cleanup the copyright of all source files	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1553.5.6 by Martin Pool Clean up comments	16
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	17	# \subsection{\emph{rio} - simple text metaformat}
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	18	#
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	19	# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	20	#
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	21	# The stored data consists of a series of \emph{stanzas}, each of which contains
	22	# \emph{fields} identified by an ascii name, with Unicode or string contents.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	23	# The field tag is constrained to alphanumeric characters.
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	24	# There may be more than one field in a stanza with the same name.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	25	#
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	26	# The format itself does not deal with character encoding issues, though
	27	# the result will normally be written in Unicode.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	28	#
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	29	# The format is intended to be simple enough that there is exactly one character
	30	# stream representation of an object and vice versa, and that this relation
	31	# will continue to hold for future versions of bzr.
	32
	33	import re
	34
1534.10.2 by Aaron Bentley Implemented rio_file to produce a light file object from stanzas	35	from bzrlib.iterablefile import IterableFile
	36
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	37	# XXX: some redundancy is allowing to write stanzas in isolation as well as
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	38	# through a writer object.
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	39
	40	class RioWriter(object):
	41	def __init__(self, to_file):
	42	self._soft_nl = False
	43	self._to_file = to_file
	44
	45	def write_stanza(self, stanza):
	46	if self._soft_nl:
2911.6.1 by Blake Winton Change 'print >> f,'s to 'f.write('s.	47	self._to_file.write('\n')
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	48	stanza.write(self._to_file)
	49	self._soft_nl = True
	50
	51
	52	class RioReader(object):
	53	"""Read stanzas from a file as a sequence
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	54
	55	to_file can be anything that can be enumerated as a sequence of
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	56	lines (with newlines.)
	57	"""
	58	def __init__(self, from_file):
	59	self._from_file = from_file
	60
	61	def __iter__(self):
	62	while True:
	63	s = read_stanza(self._from_file)
	64	if s is None:
	65	break
	66	else:
	67	yield s
	68
1534.10.2 by Aaron Bentley Implemented rio_file to produce a light file object from stanzas	69
	70	def rio_file(stanzas, header=None):
	71	"""Produce a rio IterableFile from an iterable of stanzas"""
	72	def str_iter():
	73	if header is not None:
	74	yield header + '\n'
	75	first_stanza = True
	76	for s in stanzas:
	77	if first_stanza is not True:
	78	yield '\n'
	79	for line in s.to_lines():
	80	yield line
	81	first_stanza = False
	82	return IterableFile(str_iter())
	83
	84
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	85	def read_stanzas(from_file):
	86	while True:
	87	s = read_stanza(from_file)
	88	if s is None:
	89	break
	90	else:
	91	yield s
	92
	93	class Stanza(object):
	94	"""One stanza for rio.
	95
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	96	Each stanza contains a set of named fields.
	97
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	98	Names must be non-empty ascii alphanumeric plus _. Names can be repeated
	99	within a stanza. Names are case-sensitive. The ordering of fields is
	100	preserved.
	101
	102	Each field value must be either an int or a string.
	103	"""
	104
	105	__slots__ = ['items']
	106
	107	def __init__(self, **kwargs):
	108	"""Construct a new Stanza.
	109
	110	The keyword arguments, if any, are added in sorted order to the stanza.
	111	"""
	112	self.items = []
	113	if kwargs:
	114	for tag, value in sorted(kwargs.items()):
	115	self.add(tag, value)
	116
	117	def add(self, tag, value):
	118	"""Append a name and value to the stanza."""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	119	if not valid_tag(tag):
	120	raise ValueError("invalid tag %r" % (tag,))
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	121	if isinstance(value, str):
	122	value = unicode(value)
	123	elif isinstance(value, unicode):
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	124	pass
	125	## elif isinstance(value, (int, long)):
	126	## value = str(value) # XXX: python2.4 without L-suffix
	127	else:
1553.5.7 by Martin Pool rio.Stanza.add should raise TypeError on invalid types.	128	raise TypeError("invalid type for rio value: %r of type %s"
	129	% (value, type(value)))
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	130	self.items.append((tag, value))
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	131
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	132	def __contains__(self, find_tag):
	133	"""True if there is any field in this stanza with the given tag."""
	134	for tag, value in self.items:
	135	if tag == find_tag:
	136	return True
	137	return False
	138
	139	def __len__(self):
	140	"""Return number of pairs in the stanza."""
	141	return len(self.items)
	142
	143	def __eq__(self, other):
	144	if not isinstance(other, Stanza):
	145	return False
	146	return self.items == other.items
	147
	148	def __ne__(self, other):
	149	return not self.__eq__(other)
	150
	151	def __repr__(self):
	152	return "Stanza(%r)" % self.items
	153
	154	def iter_pairs(self):
	155	"""Return iterator of tag, value pairs."""
	156	return iter(self.items)
	157
	158	def to_lines(self):
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	159	"""Generate sequence of lines for external version of this file.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	160
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	161	The lines are always utf-8 encoded strings.
	162	"""
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	163	if not self.items:
	164	# max() complains if sequence is empty
	165	return []
	166	result = []
	167	for tag, value in self.items:
	168	if value == '':
	169	result.append(tag + ': \n')
	170	elif '\n' in value:
1185.47.2 by Martin Pool Finish rio format and tests.	171	# don't want splitlines behaviour on empty lines
1185.47.2 by Martin Pool Finish rio format and tests.	172	val_lines = value.split('\n')
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	173	result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n')
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	174	for line in val_lines[1:]:
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	175	result.append('\t' + line.encode('utf-8') + '\n')
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	176	else:
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	177	result.append(tag + ': ' + value.encode('utf-8') + '\n')
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	178	return result
	179
	180	def to_string(self):
	181	"""Return stanza as a single string"""
	182	return ''.join(self.to_lines())
	183
2030.1.1 by John Arbash Meinel Make it easier to nest Stanzas with Unicode contents	184	def to_unicode(self):
	185	"""Return stanza as a single Unicode string.
	186
	187	This is most useful when adding a Stanza to a parent Stanza
	188	"""
	189	if not self.items:
	190	return u''
	191
	192	result = []
	193	for tag, value in self.items:
	194	if value == '':
	195	result.append(tag + ': \n')
	196	elif '\n' in value:
	197	# don't want splitlines behaviour on empty lines
	198	val_lines = value.split('\n')
	199	result.append(tag + ': ' + val_lines[0] + '\n')
	200	for line in val_lines[1:]:
	201	result.append('\t' + line + '\n')
	202	else:
	203	result.append(tag + ': ' + value + '\n')
	204	return u''.join(result)
	205
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	206	def write(self, to_file):
	207	"""Write stanza to a file"""
	208	to_file.writelines(self.to_lines())
	209
	210	def get(self, tag):
	211	"""Return the value for a field wih given tag.
	212
	213	If there is more than one value, only the first is returned. If the
	214	tag is not present, KeyError is raised.
	215	"""
	216	for t, v in self.items:
	217	if t == tag:
	218	return v
	219	else:
	220	raise KeyError(tag)
	221
	222	__getitem__ = get
	223
	224	def get_all(self, tag):
	225	r = []
	226	for t, v in self.items:
	227	if t == tag:
	228	r.append(v)
	229	return r
1553.5.8 by Martin Pool New Rio.as_dict method	230
	231	def as_dict(self):
	232	"""Return a dict containing the unique values of the stanza.
	233	"""
	234	d = {}
	235	for tag, value in self.items:
	236	d[tag] = value
	237	return d
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	238
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	239	_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$')
	240	def valid_tag(tag):
	241	return bool(_tag_re.match(tag))
	242
	243
	244	def read_stanza(line_iter):
	245	"""Return new Stanza read from list of lines or a file
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	246
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	247	Returns one Stanza that was read, or returns None at end of file. If a
	248	blank line follows the stanza, it is consumed. It's not an error for
	249	there to be no blank at end of file. If there is a blank file at the
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	250	start of the input this is really an empty stanza and that is returned.
1185.47.2 by Martin Pool Finish rio format and tests.	251
	252	Only the stanza lines and the trailing blank (if any) are consumed
	253	from the line_iter.
1553.5.32 by Martin Pool rio files are always externalized in utf-8. test this.	254
	255	The raw lines must be in utf-8 encoding.
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	256	"""
2030.1.5 by John Arbash Meinel Create a 'read_stanza_unicode' to handle unicode processing	257	unicode_iter = (line.decode('utf-8') for line in line_iter)
	258	return read_stanza_unicode(unicode_iter)
	259
	260
	261	def read_stanza_unicode(unicode_iter):
	262	"""Read a Stanza from a list of lines or a file.
	263
	264	The lines should already be in unicode form. This returns a single
	265	stanza that was read. If there is a blank line at the end of the Stanza,
	266	it is consumed. It is not an error for there to be no blank line at
	267	the end of the iterable. If there is a blank line at the beginning,
	268	this is treated as an empty Stanza and None is returned.
	269
	270	Only the stanza lines and the trailing blank (if any) are consumed
	271	from the unicode_iter
	272
	273	:param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
	274	if you have a utf-8 encoded string.
	275	:return: A Stanza object if there are any lines in the file.
	276	None otherwise
	277	"""
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	278	stanza = Stanza()
1185.47.2 by Martin Pool Finish rio format and tests.	279	tag = None
1185.47.2 by Martin Pool Finish rio format and tests.	280	accum_value = None
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	281
2030.1.5 by John Arbash Meinel Create a 'read_stanza_unicode' to handle unicode processing	282	# TODO: jam 20060922 This code should raise real errors rather than
	283	# using 'assert' to process user input, or raising ValueError
	284	# rather than a more specific error.
	285
	286	for line in unicode_iter:
1963.2.6 by Robey Pointer pychecker is on crack; go back to using 'is None'.	287	if line is None or line == '':
1185.47.1 by Martin Pool [broken] start converting basic_io to more rfc822-like format	288	break # end of file
	289	if line == '\n':
	290	break # end of stanza
	291	real_l = line
1185.47.2 by Martin Pool Finish rio format and tests.	292	if line[0] == '\t': # continues previous value
	293	if tag is None:
	294	raise ValueError('invalid continuation line %r' % real_l)
	295	accum_value += '\n' + line[1:-1]
	296	else: # new tag:value line
	297	if tag is not None:
	298	stanza.add(tag, accum_value)
	299	try:
	300	colon_index = line.index(': ')
	301	except ValueError:
2030.1.5 by John Arbash Meinel Create a 'read_stanza_unicode' to handle unicode processing	302	raise ValueError('tag/value separator not found in line %r'
	303	% real_l)
1534.10.2 by Aaron Bentley Implemented rio_file to produce a light file object from stanzas	304	tag = str(line[:colon_index])
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	305	if not valid_tag(tag):
	306	raise ValueError("invalid rio tag %r" % (tag,))
1185.47.2 by Martin Pool Finish rio format and tests.	307	accum_value = line[colon_index+2:-1]
2030.1.5 by John Arbash Meinel Create a 'read_stanza_unicode' to handle unicode processing	308
1185.47.2 by Martin Pool Finish rio format and tests.	309	if tag is not None: # add last tag-value
	310	stanza.add(tag, accum_value)
	311	return stanza
	312	else: # didn't see any content
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	313	return None
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	314
1551.12.38 by Aaron Bentley Add docs for MergeDirective and RIO-patch functions	315
1551.12.10 by Aaron Bentley Reduce max width to 72	316	def to_patch_lines(stanza, max_width=72):
1551.12.38 by Aaron Bentley Add docs for MergeDirective and RIO-patch functions	317	"""Convert a stanza into RIO-Patch format lines.
	318
	319	RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
	320	It resists common forms of damage such as newline conversion or the removal
	321	of trailing whitespace, yet is also reasonably easy to read.
	322
	323	:param max_width: The maximum number of characters per physical line.
	324	:return: a list of lines
	325	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	326	if max_width <= 6:
	327	raise ValueError(max_width)
1551.12.10 by Aaron Bentley Reduce max width to 72	328	max_rio_width = max_width - 4
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	329	lines = []
	330	for pline in stanza.to_lines():
	331	for line in pline.split('\n')[:-1]:
	332	line = re.sub('\\\\', '\\\\\\\\', line)
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	333	while len(line) > 0:
1551.12.10 by Aaron Bentley Reduce max width to 72	334	partline = line[:max_rio_width]
1551.12.10 by Aaron Bentley Reduce max width to 72	335	line = line[max_rio_width:]
1551.12.21 by Aaron Bentley Patch-RIO does line breaks in slightly more readable places	336	if len(line) > 0 and line[0] != [' ']:
	337	break_index = -1
	338	break_index = partline.rfind(' ', -20)
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	339	if break_index < 3:
1551.12.21 by Aaron Bentley Patch-RIO does line breaks in slightly more readable places	340	break_index = partline.rfind('-', -20)
	341	break_index += 1
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	342	if break_index < 3:
1551.12.21 by Aaron Bentley Patch-RIO does line breaks in slightly more readable places	343	break_index = partline.rfind('/', -20)
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	344	if break_index >= 3:
1551.12.21 by Aaron Bentley Patch-RIO does line breaks in slightly more readable places	345	line = partline[break_index:] + line
	346	partline = partline[:break_index]
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	347	if len(line) > 0:
	348	line = ' ' + line
1551.12.11 by Aaron Bentley Handle trailing whitepace cleanly	349	partline = re.sub('\r', '\\\\r', partline)
	350	blank_line = False
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	351	if len(line) > 0:
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	352	partline += '\\'
1551.12.11 by Aaron Bentley Handle trailing whitepace cleanly	353	elif re.search(' $', partline):
	354	partline += '\\'
	355	blank_line = True
	356	lines.append('# ' + partline + '\n')
	357	if blank_line:
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	358	lines.append('# \n')
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	359	return lines
	360
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	361
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	362	def _patch_stanza_iter(line_iter):
	363	map = {'\\\\': '\\',
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	364	'\\r' : '\r',
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	365	'\\\n': ''}
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	366	def mapget(match):
	367	return map[match.group(0)]
	368
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	369	last_line = None
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	370	for line in line_iter:
1551.12.22 by Aaron Bentley Fix handling of whitespace-stripping without newline munging	371	if line.startswith('# '):
	372	line = line[2:]
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	373	elif line.startswith('#'):
1551.12.22 by Aaron Bentley Fix handling of whitespace-stripping without newline munging	374	line = line[1:]
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	375	else:
	376	raise ValueError("bad line %r" % (line,))
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	377	if last_line is not None and len(line) > 2:
	378	line = line[2:]
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	379	line = re.sub('\r', '', line)
1551.12.9 by Aaron Bentley force patch-rio to 76 characters	380	line = re.sub('\\\\(.\|\n)', mapget, line)
	381	if last_line is None:
	382	last_line = line
	383	else:
	384	last_line += line
	385	if last_line[-1] == '\n':
	386	yield last_line
	387	last_line = None
	388	if last_line is not None:
	389	yield last_line
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	390
1551.12.23 by Aaron Bentley Indent line continuations in Patch-RIO, to improve readability	391
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	392	def read_patch_stanza(line_iter):
1551.12.38 by Aaron Bentley Add docs for MergeDirective and RIO-patch functions	393	"""Convert an iterable of RIO-Patch format lines into a Stanza.
	394
	395	RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
	396	It resists common forms of damage such as newline conversion or the removal
	397	of trailing whitespace, yet is also reasonably easy to read.
	398
	399	:return: a Stanza
	400	"""
1551.12.1 by Aaron Bentley Basic RIO patch-compatible format is working	401	return read_stanza(_patch_stanza_iter(line_iter))