~bzr-pqm/bzr/bzr.dev

1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
1
# Copyright (C) 2006 by Canonical Ltd
2
# -*- coding: utf-8 -*-
1685.1.78 by Wouter van Heyst
more code cleanup
3
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
1685.1.78 by Wouter van Heyst
more code cleanup
8
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
1685.1.78 by Wouter van Heyst
more code cleanup
13
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
1685.1.76 by Wouter van Heyst
codecleanup
18
"""Adapter for running test cases against multiple encodings."""
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
19
20
from copy import deepcopy
21
22
from bzrlib.tests import TestSuite
23
1685.1.76 by Wouter van Heyst
codecleanup
24
1685.1.3 by John Arbash Meinel
Minor cleanups
25
# prefix for micro (1/1000000)
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
26
_mu = u'\xb5'
27
1711.4.7 by John Arbash Meinel
Adding cp437, which is my default encoding on windows
28
# greek letter omega, not to be confused with
29
# the Ohm sign, u'\u2126'. Though they are probably identical
30
# cp437 can handle the first, but not the second
31
_omega = u'\u03a9'
32
33
# smallest error possible, epsilon
34
# cp437 handles u03b5, but not u2208 the 'element of' operator
35
_epsilon = u'\u03b5'
36
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
37
# Swedish?
38
_erik = u'Erik B\xe5gfors'
39
40
# Swedish 'räksmörgås' means shrimp sandwich
41
_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
42
43
# Arabic, probably only Unicode encodings can handle this one
44
_juju = u'\u062c\u0648\u062c\u0648'
45
46
# iso-8859-1 alternative for juju
47
_juju_alt = u'j\xfbj\xfa'
48
49
# Russian, 'Alexander' in russian
50
_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
1711.4.3 by John Arbash Meinel
Alexander recommended a better short russian string.
51
# The word 'test' in Russian
52
_russian_test = u'\u0422\u0435\u0441\u0442'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
53
54
# Kanji
55
# It is a kanji sequence for nihonjin, or Japanese in English.
56
# 
57
# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
58
# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
59
# not a fluent speaker, so this is just my crude breakdown.
60
# 
61
# Wouter van Heyst
62
_nihonjin = u'\u65e5\u672c\u4eba'
63
64
# Czech
65
# It's what is usually used for showing how fonts look, because it contains
66
# most accented characters, ie. in places where Englishman use 'Quick brown fox
67
# jumped over a lazy dog'. The literal translation of the Czech version would
68
# be something like 'Yellow horse groaned devilish codes'. Actually originally
69
# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
70
# the sentece to check whether one has properly set encoding.
71
_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
72
                 u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
1185.85.72 by John Arbash Meinel
Fix some of the tests.
73
_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
74
_someone = u'Some\u016f\u0148\u011b'
75
_something = u'\u0165ou\u010dk\xfd'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
76
1185.85.79 by John Arbash Meinel
Adding Hebrew characters.
77
# Hebrew
1185.85.82 by John Arbash Meinel
Fixing translation of Hebrew word.
78
# Shalom -> 'hello' or 'peace', used as a common greeting
79
_shalom = u'\u05e9\u05dc\u05d5\u05dd'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
80
1685.1.3 by John Arbash Meinel
Minor cleanups
81
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
82
class EncodingTestAdapter(object):
83
    """A tool to generate a suite, testing multiple encodings for a single test.
84
    
85
    This is similar to bzrlib.transport.TransportTestProviderAdapter.
86
    It is done by copying the test once for each encoding, and injecting
87
    the encoding name, and the list of valid strings for that encoding.
88
    Each copy is also given a new id() to make it easy to identify.
89
    """
90
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
91
    _encodings = [
92
        # Permutation 1 of utf-8
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
93
        ('utf-8', 1, {'committer':_erik
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
94
                  , 'message':_yellow_horse
95
                  , 'filename':_shrimp_sandwich
96
                  , 'directory':_nihonjin}),
97
        # Permutation 2 of utf-8
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
98
        ('utf-8', 2, {'committer':_alexander
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
99
                  , 'message':u'Testing ' + _mu
1185.85.82 by John Arbash Meinel
Fixing translation of Hebrew word.
100
                  , 'filename':_shalom
1185.85.79 by John Arbash Meinel
Adding Hebrew characters.
101
                  , 'directory':_juju}),
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
102
        ('iso-8859-1', 0, {'committer':_erik
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
103
                  , 'message':u'Testing ' + _mu
104
                  , 'filename':_juju_alt
105
                  , 'directory':_shrimp_sandwich}),
1185.85.72 by John Arbash Meinel
Fix some of the tests.
106
        ('iso-8859-2', 0, {'committer':_someone
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
107
                  , 'message':_yellow_horse
1185.85.72 by John Arbash Meinel
Fix some of the tests.
108
                  , 'filename':_yellow
109
                  , 'directory':_something}),
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
110
        ('cp1251', 0, {'committer':_alexander
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
111
                  , 'message':u'Testing ' + _mu
1711.4.3 by John Arbash Meinel
Alexander recommended a better short russian string.
112
                  , 'filename':_russian_test
113
                  , 'directory':_russian_test + 'dir'}),
1711.4.12 by John Arbash Meinel
Remove cp437 from the set of encodings, it isn't strictly needed
114
# The iso-8859-1 tests run on a default windows cp437 installation
115
# and it takes a long time to run an extra permutation of the tests
116
# But just in case we want to add this back in:
117
#        ('cp437', 0, {'committer':_erik
118
#                  , 'message':u'Testing ' + _mu
119
#                  , 'filename':'file_' + _omega
120
#                  , 'directory':_epsilon + '_dir'}),
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
121
    ]
122
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
123
    def adapt(self, test):
124
        result = TestSuite()
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
125
        for encoding, count, info in self._encodings:
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
126
            new_test = deepcopy(test)
127
            new_test.encoding = encoding
128
            new_test.info = info
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
129
            def make_new_test_id():
1185.85.71 by John Arbash Meinel
list of encodings allows repeats
130
                if count:
131
                    new_id = "%s(%s,%s)" % (new_test.id(), encoding, count)
132
                else:
133
                    new_id = "%s(%s)" % (new_test.id(), encoding)
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
134
                return lambda: new_id
135
            new_test.id = make_new_test_id()
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
136
            result.addTest(new_test)
137
        return result
138
139