~bzr-pqm/bzr/bzr.dev

1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
1
# Copyright (C) 2005 by Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Test that various operations work in a non-ASCII environment."""
18
19
import os
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
20
import sys
21
from unicodedata import normalize
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
22
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
23
from bzrlib import osutils
24
from bzrlib.osutils import pathjoin
25
from bzrlib.tests import TestCase, TestCaseWithTransport, TestSkipped
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
26
from bzrlib.workingtree import WorkingTree
27
28
29
class NonAsciiTest(TestCaseWithTransport):
1185.16.70 by Martin Pool
- improved handling of non-ascii branch names and test
30
31
    def test_add_in_nonascii_branch(self):
32
        """Test adding in a non-ASCII branch."""
33
        br_dir = u"\u1234"
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
34
        try:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
35
            wt = self.make_branch_and_tree(br_dir)
1185.12.91 by Aaron Bentley
Fixed exception (No such thing as EncodingError that I can see)
36
        except UnicodeEncodeError:
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
37
            raise TestSkipped("filesystem can't accomodate nonascii names")
1185.16.71 by Martin Pool
- try to avoid test failure on platforms with ascii-only filesystems
38
            return
1534.4.26 by Robert Collins
Move working tree initialisation out from Branch.initialize, deprecated Branch.initialize to Branch.create.
39
        file(pathjoin(br_dir, "a"), "w").write("hello")
40
        wt.add(["a"], ["a-id"])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
41
42
43
a_circle_c = u'\xe5'
1830.3.5 by John Arbash Meinel
make_entry refuses to create non-normalized entries.
44
a_circle_d = u'a\u030a'
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
45
a_dots_c = u'\xe4'
46
a_dots_d = u'a\u0308'
47
z_umlat_c = u'\u017d'
48
z_umlat_d = u'Z\u030c'
49
50
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
51
class TestNormalization(TestCase):
52
    """Verify that we have our normalizations correct."""
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
53
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
54
    def test_normalize(self):
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
55
        self.assertEqual(a_circle_d, normalize('NFKD', a_circle_c))
56
        self.assertEqual(a_circle_c, normalize('NFKC', a_circle_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
57
        self.assertEqual(a_dots_d, normalize('NFKD', a_dots_c))
58
        self.assertEqual(a_dots_c, normalize('NFKC', a_dots_d))
59
        self.assertEqual(z_umlat_d, normalize('NFKD', z_umlat_c))
60
        self.assertEqual(z_umlat_c, normalize('NFKC', z_umlat_d))
61
62
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
63
class NormalizedFilename(TestCaseWithTransport):
64
    """Test normalized_filename and associated helpers"""
65
66
    def test__accessible_normalized_filename(self):
67
        anf = osutils._accessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
68
        # normalized_filename should allow plain ascii strings
69
        # not just unicode strings
70
        self.assertEqual((u'ascii', True), anf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
71
        self.assertEqual((a_circle_c, True), anf(a_circle_c))
72
        self.assertEqual((a_circle_c, True), anf(a_circle_d))
73
        self.assertEqual((a_dots_c, True), anf(a_dots_c))
74
        self.assertEqual((a_dots_c, True), anf(a_dots_d))
75
        self.assertEqual((z_umlat_c, True), anf(z_umlat_c))
76
        self.assertEqual((z_umlat_c, True), anf(z_umlat_d))
77
78
    def test__inaccessible_normalized_filename(self):
79
        inf = osutils._inaccessible_normalized_filename
1830.3.8 by John Arbash Meinel
unicodedata.normalize requires unicode strings
80
        # normalized_filename should allow plain ascii strings
81
        # not just unicode strings
82
        self.assertEqual((u'ascii', True), inf('ascii'))
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
83
        self.assertEqual((a_circle_c, True), inf(a_circle_c))
84
        self.assertEqual((a_circle_c, False), inf(a_circle_d))
85
        self.assertEqual((a_dots_c, True), inf(a_dots_c))
86
        self.assertEqual((a_dots_c, False), inf(a_dots_d))
87
        self.assertEqual((z_umlat_c, True), inf(z_umlat_c))
88
        self.assertEqual((z_umlat_c, False), inf(z_umlat_d))
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
89
90
    def test_functions(self):
91
        if osutils.normalizes_filenames():
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
92
            self.assertEqual(osutils.normalized_filename,
93
                             osutils._accessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
94
        else:
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
95
            self.assertEqual(osutils.normalized_filename,
96
                             osutils._inaccessible_normalized_filename)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
97
98
    def test_platform(self):
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
99
        # With FAT32 and certain encodings on win32
100
        # a_circle_c and a_dots_c actually map to the same file
101
        # adding a suffix kicks in the 'preserving but insensitive'
102
        # route, and maintains the right files
103
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
104
        try:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
105
            self.build_tree(files)
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
106
        except UnicodeError:
107
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
108
109
        if sys.platform == 'darwin':
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
110
            expected = sorted([a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3'])
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
111
        else:
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
112
            expected = sorted(files)
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
113
114
        present = sorted(os.listdir(u'.'))
115
        self.assertEqual(expected, present)
116
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
117
    def test_access_normalized(self):
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
118
        # We should always be able to access files created with 
119
        # normalized filenames
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
120
        # With FAT32 and certain encodings on win32
121
        # a_circle_c and a_dots_c actually map to the same file
122
        # adding a suffix kicks in the 'preserving but insensitive'
123
        # route, and maintains the right files
124
        files = [a_circle_c+'.1', a_dots_c+'.2', z_umlat_c+'.3']
1685.1.74 by Wouter van Heyst
fix nonascii tests to run properly under LANG=C
125
        try:
126
            self.build_tree(files)
127
        except UnicodeError:
128
            raise TestSkipped("filesystem cannot create unicode files")
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
129
130
        for fname in files:
131
            # We should get an exception if we can't open the file at
132
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
133
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
134
135
            self.assertEqual(path, fname)
136
            self.assertTrue(can_access)
137
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
138
            f = open(path, 'rb')
1711.7.36 by John Arbash Meinel
Use different filenames to avoid path collisions on win32 w/ FAT32
139
            try:
140
                # Check the contents
141
                shouldbe = 'contents of %s%s' % (path.encode('utf8'),
142
                                                 os.linesep)
143
                actual = f.read()
144
            finally:
145
                f.close()
146
            self.assertEqual(shouldbe, actual, 
147
                             'contents of %s is incorrect: %r != %r'
148
                             % (path, shouldbe, actual))
1185.85.75 by John Arbash Meinel
Adding bzrlib.osutils.unicode_filename to handle unicode normalization for file paths.
149
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
150
    def test_access_non_normalized(self):
151
        # Sometimes we can access non-normalized files by their normalized
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
152
        # path, verify that normalized_filename returns the right info
1830.3.16 by John Arbash Meinel
NEWS about fixing #43689
153
        files = [a_circle_d+'.1', a_dots_d+'.2', z_umlat_d+'.3']
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
154
155
        try:
156
            self.build_tree(files)
157
        except UnicodeError:
158
            raise TestSkipped("filesystem cannot create unicode files")
159
160
        for fname in files:
161
            # We should get an exception if we can't open the file at
162
            # this location.
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
163
            path, can_access = osutils.normalized_filename(fname)
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
164
165
            self.assertNotEqual(path, fname)
166
167
            # We should always be able to access them from the name
168
            # they were created with
169
            f = open(fname, 'rb')
170
            f.close()
171
1830.3.2 by John Arbash Meinel
normalized_filename is a much better name
172
            # And normalized_filename sholud tell us correctly if we can
1830.3.1 by John Arbash Meinel
Change the return value of unicode_filename, and make it testable on all platforms
173
            # access them by an alternate name
174
            if can_access:
175
                f = open(path, 'rb')
176
                f.close()
177
            else:
178
                self.assertRaises(IOError, open, path, 'rb')