~bzr-pqm/bzr/bzr.dev : contents of bzrlib/tests/EncodingAdapter.py at revision 1711.8.7

~bzr-pqm/bzr/bzr.dev : (revision 1711.8.7)

1185.85.69 by John Arbash Meinel New encoder with multiple strings.	1	# Copyright (C) 2006 by Canonical Ltd
	2	# -- coding: utf-8 --
1685.1.78 by Wouter van Heyst more code cleanup	3	#
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	4	# This program is free software; you can redistribute it and/or modify
	5	# it under the terms of the GNU General Public License as published by
	6	# the Free Software Foundation; either version 2 of the License, or
	7	# (at your option) any later version.
1685.1.78 by Wouter van Heyst more code cleanup	8	#
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU General Public License for more details.
1685.1.78 by Wouter van Heyst more code cleanup	13	#
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	14	# You should have received a copy of the GNU General Public License
	15	# along with this program; if not, write to the Free Software
	16	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	17
1685.1.76 by Wouter van Heyst codecleanup	18	"""Adapter for running test cases against multiple encodings."""
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	19
	20	from copy import deepcopy
	21
	22	from bzrlib.tests import TestSuite
	23
1685.1.76 by Wouter van Heyst codecleanup	24
1685.1.3 by John Arbash Meinel Minor cleanups	25	# prefix for micro (1/1000000)
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	26	_mu = u'\xb5'
	27
1711.4.7 by John Arbash Meinel Adding cp437, which is my default encoding on windows	28	# greek letter omega, not to be confused with
	29	# the Ohm sign, u'\u2126'. Though they are probably identical
	30	# cp437 can handle the first, but not the second
	31	_omega = u'\u03a9'
	32
	33	# smallest error possible, epsilon
	34	# cp437 handles u03b5, but not u2208 the 'element of' operator
	35	_epsilon = u'\u03b5'
	36
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	37	# Swedish?
	38	_erik = u'Erik B\xe5gfors'
	39
	40	# Swedish 'räksmörgås' means shrimp sandwich
	41	_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
	42
	43	# Arabic, probably only Unicode encodings can handle this one
	44	_juju = u'\u062c\u0648\u062c\u0648'
	45
	46	# iso-8859-1 alternative for juju
	47	_juju_alt = u'j\xfbj\xfa'
	48
	49	# Russian, 'Alexander' in russian
	50	_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
1711.4.3 by John Arbash Meinel Alexander recommended a better short russian string.	51	# The word 'test' in Russian
	52	_russian_test = u'\u0422\u0435\u0441\u0442'
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	53
	54	# Kanji
	55	# It is a kanji sequence for nihonjin, or Japanese in English.
	56	#
	57	# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
	58	# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
	59	# not a fluent speaker, so this is just my crude breakdown.
	60	#
	61	# Wouter van Heyst
	62	_nihonjin = u'\u65e5\u672c\u4eba'
	63
	64	# Czech
	65	# It's what is usually used for showing how fonts look, because it contains
	66	# most accented characters, ie. in places where Englishman use 'Quick brown fox
	67	# jumped over a lazy dog'. The literal translation of the Czech version would
	68	# be something like 'Yellow horse groaned devilish codes'. Actually originally
	69	# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
	70	# the sentece to check whether one has properly set encoding.
	71	_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
	72	u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
1185.85.72 by John Arbash Meinel Fix some of the tests.	73	_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
	74	_someone = u'Some\u016f\u0148\u011b'
	75	_something = u'\u0165ou\u010dk\xfd'
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	76
1185.85.79 by John Arbash Meinel Adding Hebrew characters.	77	# Hebrew
1185.85.82 by John Arbash Meinel Fixing translation of Hebrew word.	78	# Shalom -> 'hello' or 'peace', used as a common greeting
	79	_shalom = u'\u05e9\u05dc\u05d5\u05dd'
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	80
1685.1.3 by John Arbash Meinel Minor cleanups	81
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	82	class EncodingTestAdapter(object):
	83	"""A tool to generate a suite, testing multiple encodings for a single test.
	84
	85	This is similar to bzrlib.transport.TransportTestProviderAdapter.
	86	It is done by copying the test once for each encoding, and injecting
	87	the encoding name, and the list of valid strings for that encoding.
	88	Each copy is also given a new id() to make it easy to identify.
	89	"""
	90
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	91	_encodings = [
	92	# Permutation 1 of utf-8
1185.85.71 by John Arbash Meinel list of encodings allows repeats	93	('utf-8', 1, {'committer':_erik
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	94	, 'message':_yellow_horse
	95	, 'filename':_shrimp_sandwich
	96	, 'directory':_nihonjin}),
	97	# Permutation 2 of utf-8
1185.85.71 by John Arbash Meinel list of encodings allows repeats	98	('utf-8', 2, {'committer':_alexander
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	99	, 'message':u'Testing ' + _mu
1185.85.82 by John Arbash Meinel Fixing translation of Hebrew word.	100	, 'filename':_shalom
1185.85.79 by John Arbash Meinel Adding Hebrew characters.	101	, 'directory':_juju}),
1185.85.71 by John Arbash Meinel list of encodings allows repeats	102	('iso-8859-1', 0, {'committer':_erik
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	103	, 'message':u'Testing ' + _mu
	104	, 'filename':_juju_alt
	105	, 'directory':_shrimp_sandwich}),
1185.85.72 by John Arbash Meinel Fix some of the tests.	106	('iso-8859-2', 0, {'committer':_someone
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	107	, 'message':_yellow_horse
1185.85.72 by John Arbash Meinel Fix some of the tests.	108	, 'filename':_yellow
1185.85.72 by John Arbash Meinel Fix some of the tests.	109	, 'directory':_something}),
1185.85.71 by John Arbash Meinel list of encodings allows repeats	110	('cp1251', 0, {'committer':_alexander
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	111	, 'message':u'Testing ' + _mu
1711.4.3 by John Arbash Meinel Alexander recommended a better short russian string.	112	, 'filename':_russian_test
	113	, 'directory':_russian_test + 'dir'}),
1711.4.12 by John Arbash Meinel Remove cp437 from the set of encodings, it isn't strictly needed	114	# The iso-8859-1 tests run on a default windows cp437 installation
	115	# and it takes a long time to run an extra permutation of the tests
	116	# But just in case we want to add this back in:
	117	# ('cp437', 0, {'committer':_erik
	118	# , 'message':u'Testing ' + _mu
	119	# , 'filename':'file_' + _omega
	120	# , 'directory':_epsilon + '_dir'}),
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	121	]
	122
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	123	def adapt(self, test):
	124	result = TestSuite()
1185.85.71 by John Arbash Meinel list of encodings allows repeats	125	for encoding, count, info in self._encodings:
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	126	new_test = deepcopy(test)
	127	new_test.encoding = encoding
	128	new_test.info = info
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	129	def make_new_test_id():
1185.85.71 by John Arbash Meinel list of encodings allows repeats	130	if count:
	131	new_id = "%s(%s,%s)" % (new_test.id(), encoding, count)
	132	else:
	133	new_id = "%s(%s)" % (new_test.id(), encoding)
1185.85.70 by John Arbash Meinel Hooked up EncodingAdapter, and updated test_non_ascii.	134	return lambda: new_id
	135	new_test.id = make_new_test_id()
1185.85.69 by John Arbash Meinel New encoder with multiple strings.	136	result.addTest(new_test)
	137	return result
	138
	139