~bzr-pqm/bzr/bzr.dev

5557.1.15 by John Arbash Meinel
Merge bzr.dev 5597 to resolve NEWS, aka bzr-2.3.txt
1
# Copyright (C) 2005, 2006, 2008, 2009, 2011 Canonical Ltd
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
16
17
"""Test that various operations work in a non-ASCII environment."""
18
19
import os
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
20
import sys
21
from unicodedata import normalize
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
22
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
23
from bzrlib import osutils
24
from bzrlib.osutils import pathjoin
25
from bzrlib.tests import TestCase, TestCaseWithTransport, TestSkipped
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
26
27
28
class NonAsciiTest(TestCaseWithTransport):
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
29
30
    def test_add_in_nonascii_branch(self):
31
        """Test adding in a non-ASCII branch."""
32
        br_dir = u"\u1234"
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
33
        try:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
34
            wt = self.make_branch_and_tree(br_dir)
1185.12.91 by Aaron Bentley
Fixed exception (No such thing as EncodingError that I can see)
35
        except UnicodeEncodeError:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
36
            raise TestSkipped("filesystem can't accomodate nonascii names")
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
37
            return
6437.20.3 by Wouter van Heyst
mechanically replace file().write() pattern with a with-keyword version
38
        with file(pathjoin(br_dir, "a"), "w") as f: f.write("hello")
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
39
        wt.add(["a"], ["a-id"])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
40
41
42
a_circle_c = u'\xe5'
1830.3.5 by John Arbash Meinel
make_entry refuses to create non-normalized entries.
43
a_circle_d = u'a\u030a'
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
44
a_dots_c = u'\xe4'
45
a_dots_d = u'a\u0308'
46
z_umlat_c = u'\u017d'
47
z_umlat_d = u'Z\u030c'
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
48
squared_c = u'\xbc' # This gets mapped to '2' if we use NFK[CD]
49
squared_d = u'\xbc'
50
quarter_c = u'\xb2' # Gets mapped to u'1\u20444' (1/4) if we use NFK[CD]
51
quarter_d = u'\xb2'
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
52
53
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
54
class TestNormalization(TestCase):
55
    """Verify that we have our normalizations correct."""
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
56
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
57
    def test_normalize(self):
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
58
        self.assertEqual(a_circle_d, normalize('NFD', a_circle_c))
59
        self.assertEqual(a_circle_c, normalize('NFC', a_circle_d))
60
        self.assertEqual(a_dots_d, normalize('NFD', a_dots_c))
61
        self.assertEqual(a_dots_c, normalize('NFC', a_dots_d))
62
        self.assertEqual(z_umlat_d, normalize('NFD', z_umlat_c))
63
        self.assertEqual(z_umlat_c, normalize('NFC', z_umlat_d))
64
        self.assertEqual(squared_d, normalize('NFC', squared_c))
65
        self.assertEqual(squared_c, normalize('NFD', squared_d))
66
        self.assertEqual(quarter_d, normalize('NFC', quarter_c))
67
        self.assertEqual(quarter_c, normalize('NFD', quarter_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
68
69
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
70
class NormalizedFilename(TestCaseWithTransport):
71
    """Test normalized_filename and associated helpers"""
72
73
    def test__accessible_normalized_filename(self):
74
        anf = osutils._accessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
75
        # normalized_filename should allow plain ascii strings
76
        # not just unicode strings
77
        self.assertEqual((u'ascii', True), anf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
78
        self.assertEqual((a_circle_c, True), anf(a_circle_c))
79
        self.assertEqual((a_circle_c, True), anf(a_circle_d))
80
        self.assertEqual((a_dots_c, True), anf(a_dots_c))
81
        self.assertEqual((a_dots_c, True), anf(a_dots_d))
82
        self.assertEqual((z_umlat_c, True), anf(z_umlat_c))
83
        self.assertEqual((z_umlat_c, True), anf(z_umlat_d))
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
84
        self.assertEqual((squared_c, True), anf(squared_c))
85
        self.assertEqual((squared_c, True), anf(squared_d))
86
        self.assertEqual((quarter_c, True), anf(quarter_c))
87
        self.assertEqual((quarter_c, True), anf(quarter_d))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
88
89
    def test__inaccessible_normalized_filename(self):
90
        inf = osutils._inaccessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
91
        # normalized_filename should allow plain ascii strings
92
        # not just unicode strings
93
        self.assertEqual((u'ascii', True), inf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
94
        self.assertEqual((a_circle_c, True), inf(a_circle_c))
95
        self.assertEqual((a_circle_c, False), inf(a_circle_d))
96
        self.assertEqual((a_dots_c, True), inf(a_dots_c))
97
        self.assertEqual((a_dots_c, False), inf(a_dots_d))
98
        self.assertEqual((z_umlat_c, True), inf(z_umlat_c))
99
        self.assertEqual((z_umlat_c, False), inf(z_umlat_d))
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
100
        self.assertEqual((squared_c, True), inf(squared_c))
101
        self.assertEqual((squared_c, True), inf(squared_d))
102
        self.assertEqual((quarter_c, True), inf(quarter_c))
103
        self.assertEqual((quarter_c, True), inf(quarter_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
104
105
    def test_functions(self):
106
        if osutils.normalizes_filenames():
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
107
            self.assertEqual(osutils.normalized_filename,
108
                             osutils._accessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
109
        else:
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
110
            self.assertEqual(osutils.normalized_filename,
111
                             osutils._inaccessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
112
113
    def test_platform(self):
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
114
        # With FAT32 and certain encodings on win32
115
        # a_circle_c and a_dots_c actually map to the same file
116
        # adding a suffix kicks in the 'preserving but insensitive'
117
        # route, and maintains the right files
118
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
119
        try:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
120
            self.build_tree(files)
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
121
        except UnicodeError:
122
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
123
124
        if sys.platform == 'darwin':
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
125
            expected = sorted([a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3'])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
126
        else:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
127
            expected = sorted(files)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
128
129
        present = sorted(os.listdir(u'.'))
130
        self.assertEqual(expected, present)
131
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
132
    def test_access_normalized(self):
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
133
        # We should always be able to access files created with
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
134
        # normalized filenames
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
135
        # With FAT32 and certain encodings on win32
136
        # a_circle_c and a_dots_c actually map to the same file
137
        # adding a suffix kicks in the 'preserving but insensitive'
138
        # route, and maintains the right files
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
139
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3',
140
                 squared_c+'.4', quarter_c+'.5']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
141
        try:
2204.3.1 by Alexander Belchenko
fix win32 selftest regression introduced by bzr.dev.revno2198
142
            self.build_tree(files, line_endings='native')
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
143
        except UnicodeError:
144
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
145
146
        for fname in files:
147
            # We should get an exception if we can't open the file at
148
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
149
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
150
151
            self.assertEqual(path, fname)
152
            self.assertTrue(can_access)
153
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
154
            f = open(path, 'rb')
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
155
            try:
156
                # Check the contents
157
                shouldbe = 'contents of %s%s' % (path.encode('utf8'),
158
                                                 os.linesep)
159
                actual = f.read()
160
            finally:
161
                f.close()
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
162
            self.assertEqual(shouldbe, actual,
2204.3.3 by Alexander Belchenko
Don't formats unicode strings in test error messages with '%s': it's unsafe
163
                             'contents of %r is incorrect: %r != %r'
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
164
                             % (path, shouldbe, actual))
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
165
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
166
    def test_access_non_normalized(self):
167
        # Sometimes we can access non-normalized files by their normalized
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
168
        # path, verify that normalized_filename returns the right info
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
169
        files = [a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3']
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
170
171
        try:
172
            self.build_tree(files)
173
        except UnicodeError:
174
            raise TestSkipped("filesystem cannot create unicode files")
175
176
        for fname in files:
177
            # We should get an exception if we can't open the file at
178
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
179
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
180
181
            self.assertNotEqual(path, fname)
182
183
            # We should always be able to access them from the name
184
            # they were created with
185
            f = open(fname, 'rb')
186
            f.close()
187
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
188
            # And normalized_filename sholud tell us correctly if we can
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
189
            # access them by an alternate name
190
            if can_access:
191
                f = open(path, 'rb')
192
                f.close()
193
            else:
194
                self.assertRaises(IOError, open, path, 'rb')