~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005 Canonical Ltd
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Test that various operations work in a non-ASCII environment."""
18
19
import os
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
20
import sys
21
from unicodedata import normalize
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
22
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
23
from bzrlib import osutils
24
from bzrlib.osutils import pathjoin
25
from bzrlib.tests import TestCase, TestCaseWithTransport, TestSkipped
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
26
from bzrlib.workingtree import WorkingTree
27
28
29
class NonAsciiTest(TestCaseWithTransport):
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
30
31
    def test_add_in_nonascii_branch(self):
32
        """Test adding in a non-ASCII branch."""
33
        br_dir = u"\u1234"
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
34
        try:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
35
            wt = self.make_branch_and_tree(br_dir)
1185.12.91 by Aaron Bentley
Fixed exception (No such thing as EncodingError that I can see)
36
        except UnicodeEncodeError:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
37
            raise TestSkipped("filesystem can't accomodate nonascii names")
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
38
            return
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
39
        file(pathjoin(br_dir, "a"), "w").write("hello")
40
        wt.add(["a"], ["a-id"])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
41
42
43
a_circle_c = u'\xe5'
1830.3.5 by John Arbash Meinel
make_entry refuses to create non-normalized entries.
44
a_circle_d = u'a\u030a'
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
45
a_dots_c = u'\xe4'
46
a_dots_d = u'a\u0308'
47
z_umlat_c = u'\u017d'
48
z_umlat_d = u'Z\u030c'
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
49
squared_c = u'\xbc' # This gets mapped to '2' if we use NFK[CD]
50
squared_d = u'\xbc'
51
quarter_c = u'\xb2' # Gets mapped to u'1\u20444' (1/4) if we use NFK[CD]
52
quarter_d = u'\xb2'
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
53
54
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
55
class TestNormalization(TestCase):
56
    """Verify that we have our normalizations correct."""
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
57
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
58
    def test_normalize(self):
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
59
        self.assertEqual(a_circle_d, normalize('NFD', a_circle_c))
60
        self.assertEqual(a_circle_c, normalize('NFC', a_circle_d))
61
        self.assertEqual(a_dots_d, normalize('NFD', a_dots_c))
62
        self.assertEqual(a_dots_c, normalize('NFC', a_dots_d))
63
        self.assertEqual(z_umlat_d, normalize('NFD', z_umlat_c))
64
        self.assertEqual(z_umlat_c, normalize('NFC', z_umlat_d))
65
        self.assertEqual(squared_d, normalize('NFC', squared_c))
66
        self.assertEqual(squared_c, normalize('NFD', squared_d))
67
        self.assertEqual(quarter_d, normalize('NFC', quarter_c))
68
        self.assertEqual(quarter_c, normalize('NFD', quarter_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
69
70
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
71
class NormalizedFilename(TestCaseWithTransport):
72
    """Test normalized_filename and associated helpers"""
73
74
    def test__accessible_normalized_filename(self):
75
        anf = osutils._accessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
76
        # normalized_filename should allow plain ascii strings
77
        # not just unicode strings
78
        self.assertEqual((u'ascii', True), anf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
79
        self.assertEqual((a_circle_c, True), anf(a_circle_c))
80
        self.assertEqual((a_circle_c, True), anf(a_circle_d))
81
        self.assertEqual((a_dots_c, True), anf(a_dots_c))
82
        self.assertEqual((a_dots_c, True), anf(a_dots_d))
83
        self.assertEqual((z_umlat_c, True), anf(z_umlat_c))
84
        self.assertEqual((z_umlat_c, True), anf(z_umlat_d))
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
85
        self.assertEqual((squared_c, True), anf(squared_c))
86
        self.assertEqual((squared_c, True), anf(squared_d))
87
        self.assertEqual((quarter_c, True), anf(quarter_c))
88
        self.assertEqual((quarter_c, True), anf(quarter_d))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
89
90
    def test__inaccessible_normalized_filename(self):
91
        inf = osutils._inaccessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
92
        # normalized_filename should allow plain ascii strings
93
        # not just unicode strings
94
        self.assertEqual((u'ascii', True), inf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
95
        self.assertEqual((a_circle_c, True), inf(a_circle_c))
96
        self.assertEqual((a_circle_c, False), inf(a_circle_d))
97
        self.assertEqual((a_dots_c, True), inf(a_dots_c))
98
        self.assertEqual((a_dots_c, False), inf(a_dots_d))
99
        self.assertEqual((z_umlat_c, True), inf(z_umlat_c))
100
        self.assertEqual((z_umlat_c, False), inf(z_umlat_d))
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
101
        self.assertEqual((squared_c, True), inf(squared_c))
102
        self.assertEqual((squared_c, True), inf(squared_d))
103
        self.assertEqual((quarter_c, True), inf(quarter_c))
104
        self.assertEqual((quarter_c, True), inf(quarter_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
105
106
    def test_functions(self):
107
        if osutils.normalizes_filenames():
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
108
            self.assertEqual(osutils.normalized_filename,
109
                             osutils._accessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
110
        else:
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
111
            self.assertEqual(osutils.normalized_filename,
112
                             osutils._inaccessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
113
114
    def test_platform(self):
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
115
        # With FAT32 and certain encodings on win32
116
        # a_circle_c and a_dots_c actually map to the same file
117
        # adding a suffix kicks in the 'preserving but insensitive'
118
        # route, and maintains the right files
119
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
120
        try:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
121
            self.build_tree(files)
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
122
        except UnicodeError:
123
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
124
125
        if sys.platform == 'darwin':
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
126
            expected = sorted([a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3'])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
127
        else:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
128
            expected = sorted(files)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
129
130
        present = sorted(os.listdir(u'.'))
131
        self.assertEqual(expected, present)
132
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
133
    def test_access_normalized(self):
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
134
        # We should always be able to access files created with 
135
        # normalized filenames
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
136
        # With FAT32 and certain encodings on win32
137
        # a_circle_c and a_dots_c actually map to the same file
138
        # adding a suffix kicks in the 'preserving but insensitive'
139
        # route, and maintains the right files
3201.1.1 by jameinel
Fix bug #185458, switch from NFKC to NFC and add tests for filenames that would be broken under NFKC
140
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3',
141
                 squared_c+'.4', quarter_c+'.5']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
142
        try:
2204.3.1 by Alexander Belchenko
fix win32 selftest regression introduced by bzr.dev.revno2198
143
            self.build_tree(files, line_endings='native')
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
144
        except UnicodeError:
145
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
146
147
        for fname in files:
148
            # We should get an exception if we can't open the file at
149
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
150
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
151
152
            self.assertEqual(path, fname)
153
            self.assertTrue(can_access)
154
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
155
            f = open(path, 'rb')
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
156
            try:
157
                # Check the contents
158
                shouldbe = 'contents of %s%s' % (path.encode('utf8'),
159
                                                 os.linesep)
160
                actual = f.read()
161
            finally:
162
                f.close()
163
            self.assertEqual(shouldbe, actual, 
2204.3.3 by Alexander Belchenko
Don't formats unicode strings in test error messages with '%s': it's unsafe
164
                             'contents of %r is incorrect: %r != %r'
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
165
                             % (path, shouldbe, actual))
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
166
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
167
    def test_access_non_normalized(self):
168
        # Sometimes we can access non-normalized files by their normalized
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
169
        # path, verify that normalized_filename returns the right info
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
170
        files = [a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3']
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
171
172
        try:
173
            self.build_tree(files)
174
        except UnicodeError:
175
            raise TestSkipped("filesystem cannot create unicode files")
176
177
        for fname in files:
178
            # We should get an exception if we can't open the file at
179
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
180
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
181
182
            self.assertNotEqual(path, fname)
183
184
            # We should always be able to access them from the name
185
            # they were created with
186
            f = open(fname, 'rb')
187
            f.close()
188
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
189
            # And normalized_filename sholud tell us correctly if we can
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
190
            # access them by an alternate name
191
            if can_access:
192
                f = open(path, 'rb')
193
                f.close()
194
            else:
195
                self.assertRaises(IOError, open, path, 'rb')