~bzr-pqm/bzr/bzr.dev

5557.1.15 by John Arbash Meinel
Merge bzr.dev 5597 to resolve NEWS, aka bzr-2.3.txt
1
# Copyright (C) 2006, 2009, 2010, 2011 Canonical Ltd
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
2
# -*- coding: utf-8 -*-
1685.1.78 by Wouter van Heyst
more code cleanup
3
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
1685.1.78 by Wouter van Heyst
more code cleanup
8
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
1685.1.78 by Wouter van Heyst
more code cleanup
13
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
17
1685.1.76 by Wouter van Heyst
codecleanup
18
"""Adapter for running test cases against multiple encodings."""
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
19
1685.1.3 by John Arbash Meinel
Minor cleanups
20
# prefix for micro (1/1000000)
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
21
_mu = u'\xb5'
22
1711.4.7 by John Arbash Meinel
Adding cp437, which is my default encoding on windows
23
# greek letter omega, not to be confused with
24
# the Ohm sign, u'\u2126'. Though they are probably identical
25
# cp437 can handle the first, but not the second
26
_omega = u'\u03a9'
27
28
# smallest error possible, epsilon
29
# cp437 handles u03b5, but not u2208 the 'element of' operator
30
_epsilon = u'\u03b5'
31
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
32
# Swedish?
33
_erik = u'Erik B\xe5gfors'
34
35
# Swedish 'räksmörgås' means shrimp sandwich
36
_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
37
38
# Arabic, probably only Unicode encodings can handle this one
39
_juju = u'\u062c\u0648\u062c\u0648'
40
41
# iso-8859-1 alternative for juju
42
_juju_alt = u'j\xfbj\xfa'
43
44
# Russian, 'Alexander' in russian
45
_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
1711.4.3 by John Arbash Meinel
Alexander recommended a better short russian string.
46
# The word 'test' in Russian
47
_russian_test = u'\u0422\u0435\u0441\u0442'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
48
49
# Kanji
50
# It is a kanji sequence for nihonjin, or Japanese in English.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
51
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
52
# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
53
# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
54
# not a fluent speaker, so this is just my crude breakdown.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
55
#
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
56
# Wouter van Heyst
57
_nihonjin = u'\u65e5\u672c\u4eba'
58
59
# Czech
60
# It's what is usually used for showing how fonts look, because it contains
61
# most accented characters, ie. in places where Englishman use 'Quick brown fox
62
# jumped over a lazy dog'. The literal translation of the Czech version would
63
# be something like 'Yellow horse groaned devilish codes'. Actually originally
64
# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
65
# the sentece to check whether one has properly set encoding.
66
_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
67
                 u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
1185.85.72 by John Arbash Meinel
Fix some of the tests.
68
_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
69
_someone = u'Some\u016f\u0148\u011b'
70
_something = u'\u0165ou\u010dk\xfd'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
71
1185.85.79 by John Arbash Meinel
Adding Hebrew characters.
72
# Hebrew
1185.85.82 by John Arbash Meinel
Fixing translation of Hebrew word.
73
# Shalom -> 'hello' or 'peace', used as a common greeting
74
_shalom = u'\u05e9\u05dc\u05d5\u05dd'
1185.85.69 by John Arbash Meinel
New encoder with multiple strings.
75
1685.1.3 by John Arbash Meinel
Minor cleanups
76
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
77
encoding_scenarios = [
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
78
        # Permutation 1 of utf-8
4084.5.2 by Robert Collins
Fix mis-transcribed encoding test scenarios.
79
        ('utf-8,1', {
80
            'info': {
81
                'committer': _erik,
82
                'message': _yellow_horse,
83
                'filename': _shrimp_sandwich,
84
                'directory': _nihonjin,
85
                },
86
            'encoding': 'utf-8',
87
            }),
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
88
        # Permutation 2 of utf-8
4084.5.2 by Robert Collins
Fix mis-transcribed encoding test scenarios.
89
        ('utf-8,2', {
90
            'info': {
91
                'committer': _alexander,
92
                'message': u'Testing ' + _mu,
93
                'filename': _shalom,
94
                'directory': _juju,
95
                },
96
            'encoding': 'utf-8',
97
            }),
98
        ('iso-8859-1', {
99
            'info': {
100
                'committer': _erik,
101
                'message': u'Testing ' + _mu,
102
                'filename': _juju_alt,
103
                'directory': _shrimp_sandwich,
104
                },
105
            'encoding': 'iso-8859-1',
106
            }),
107
        ('iso-8859-2', {
108
            'info': {
109
                'committer': _someone,
110
                'message': _yellow_horse,
111
                'filename': _yellow,
112
                'directory': _something,
113
                },
114
            'encoding': 'iso-8859-2',
115
            }),
116
        ('cp1251', {
117
            'info': {
118
                'committer': _alexander,
119
                'message': u'Testing ' + _mu,
120
                'filename': _russian_test,
121
                'directory': _russian_test + 'dir',
122
                },
123
            'encoding': 'cp1251',
124
            }),
1711.4.12 by John Arbash Meinel
Remove cp437 from the set of encodings, it isn't strictly needed
125
# The iso-8859-1 tests run on a default windows cp437 installation
126
# and it takes a long time to run an extra permutation of the tests
127
# But just in case we want to add this back in:
4084.5.1 by Robert Collins
Bulk update all test adaptation into a single approach, using multiply_tests rather than test adapters.
128
#        ('cp437', {'committer':_erik
1711.4.12 by John Arbash Meinel
Remove cp437 from the set of encodings, it isn't strictly needed
129
#                  , 'message':u'Testing ' + _mu
130
#                  , 'filename':'file_' + _omega
4084.5.2 by Robert Collins
Fix mis-transcribed encoding test scenarios.
131
#                  , 'directory':_epsilon + '_dir',
132
#            'encoding': 'cp437'}),
1185.85.70 by John Arbash Meinel
Hooked up EncodingAdapter, and updated test_non_ascii.
133
    ]