1
# Copyright (C) 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tests for the urlutils wrapper."""
23
from bzrlib.errors import InvalidURL, InvalidURLJoin
24
import bzrlib.urlutils as urlutils
25
from bzrlib.tests import TestCaseInTempDir, TestCase
28
class TestUrlToPath(TestCase):
30
def test_basename(self):
31
# bzrlib.urlutils.basename
32
# Test bzrlib.urlutils.split()
33
basename = urlutils.basename
34
if sys.platform == 'win32':
35
self.assertRaises(InvalidURL, basename, 'file:///path/to/foo')
36
self.assertEqual('foo', basename('file:///C|/foo'))
37
self.assertEqual('', basename('file:///C|/'))
39
self.assertEqual('foo', basename('file:///foo'))
40
self.assertEqual('', basename('file:///'))
42
self.assertEqual('foo', basename('http://host/path/to/foo'))
43
self.assertEqual('foo', basename('http://host/path/to/foo/'))
45
basename('http://host/path/to/foo/', exclude_trailing_slash=False))
46
self.assertEqual('path', basename('http://host/path'))
47
self.assertEqual('', basename('http://host/'))
48
self.assertEqual('', basename('http://host'))
49
self.assertEqual('path', basename('http:///nohost/path'))
51
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path'))
52
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path/'))
53
self.assertEqual('', basename('random+scheme://user:pass@ahost:port/'))
56
self.assertEqual('foo', basename('path/to/foo'))
57
self.assertEqual('foo', basename('path/to/foo/'))
58
self.assertEqual('', basename('path/to/foo/',
59
exclude_trailing_slash=False))
60
self.assertEqual('foo', basename('path/../foo'))
61
self.assertEqual('foo', basename('../path/foo'))
63
def test_normalize_url_files(self):
64
# Test that local paths are properly normalized
65
normalize_url = urlutils.normalize_url
67
def norm_file(expected, path):
68
url = normalize_url(path)
69
self.assertStartsWith(url, 'file:///')
70
if sys.platform == 'win32':
71
url = url[len('file:///C:'):]
73
url = url[len('file://'):]
75
self.assertEndsWith(url, expected)
77
norm_file('path/to/foo', 'path/to/foo')
78
norm_file('/path/to/foo', '/path/to/foo')
79
norm_file('path/to/foo', '../path/to/foo')
81
# Local paths are assumed to *not* be escaped at all
82
norm_file('uni/%C2%B5', u'uni/\xb5')
83
norm_file('uni/%25C2%25B5', u'uni/%C2%B5')
84
norm_file('uni/%20b', u'uni/ b')
85
# All the crazy characters get escaped in local paths => file:/// urls
86
norm_file('%27%3B/%3F%3A%40%26%3D%2B%24%2C%23%20', "';/?:@&=+$,# ")
88
def test_normalize_url_hybrid(self):
89
# Anything with a scheme:// should be treated as a hybrid url
90
# which changes what characters get escaped.
91
normalize_url = urlutils.normalize_url
94
eq('file:///foo/', normalize_url(u'file:///foo/'))
95
eq('file:///foo/%20', normalize_url(u'file:///foo/ '))
96
eq('file:///foo/%20', normalize_url(u'file:///foo/%20'))
97
# Don't escape reserved characters
98
eq('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$',
99
normalize_url('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
100
eq('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$',
101
normalize_url('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
103
# Escape unicode characters, but not already escaped chars
104
eq('http://host/ab/%C2%B5/%C2%B5',
105
normalize_url(u'http://host/ab/%C2%B5/\xb5'))
107
# Normalize verifies URLs when they are not unicode
108
# (indicating they did not come from the user)
109
self.assertRaises(InvalidURL, normalize_url, 'http://host/\xb5')
110
self.assertRaises(InvalidURL, normalize_url, 'http://host/ ')
112
def test_url_scheme_re(self):
113
# Test paths that may be URLs
114
def test_one(url, scheme_and_path):
115
"""Assert that _url_scheme_re correctly matches
117
:param scheme_and_path: The (scheme, path) that should be matched
118
can be None, to indicate it should not match
120
m = urlutils._url_scheme_re.match(url)
121
if scheme_and_path is None:
122
self.assertEqual(None, m)
124
self.assertEqual(scheme_and_path[0], m.group('scheme'))
125
self.assertEqual(scheme_and_path[1], m.group('path'))
128
test_one('/path', None)
129
test_one('C:/path', None)
130
test_one('../path/to/foo', None)
131
test_one(u'../path/to/fo\xe5', None)
134
test_one('http://host/path/', ('http', 'host/path/'))
135
test_one('sftp://host/path/to/foo', ('sftp', 'host/path/to/foo'))
136
test_one('file:///usr/bin', ('file', '/usr/bin'))
137
test_one('file:///C:/Windows', ('file', '/C:/Windows'))
138
test_one('file:///C|/Windows', ('file', '/C|/Windows'))
139
test_one(u'readonly+sftp://host/path/\xe5', ('readonly+sftp', u'host/path/\xe5'))
142
# Can't have slashes or colons in the scheme
143
test_one('/path/to/://foo', None)
144
test_one('path:path://foo', None)
145
# Must have more than one character for scheme
146
test_one('C://foo', None)
147
test_one('ab://foo', ('ab', 'foo'))
149
def test_dirname(self):
150
# Test bzrlib.urlutils.dirname()
151
dirname = urlutils.dirname
152
if sys.platform == 'win32':
153
self.assertRaises(InvalidURL, dirname, 'file:///path/to/foo')
154
self.assertEqual('file:///C|/', dirname('file:///C|/foo'))
155
self.assertEqual('file:///C|/', dirname('file:///C|/'))
157
self.assertEqual('file:///', dirname('file:///foo'))
158
self.assertEqual('file:///', dirname('file:///'))
160
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo'))
161
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo/'))
162
self.assertEqual('http://host/path/to/foo',
163
dirname('http://host/path/to/foo/', exclude_trailing_slash=False))
164
self.assertEqual('http://host/', dirname('http://host/path'))
165
self.assertEqual('http://host/', dirname('http://host/'))
166
self.assertEqual('http://host', dirname('http://host'))
167
self.assertEqual('http:///nohost', dirname('http:///nohost/path'))
169
self.assertEqual('random+scheme://user:pass@ahost:port/',
170
dirname('random+scheme://user:pass@ahost:port/path'))
171
self.assertEqual('random+scheme://user:pass@ahost:port/',
172
dirname('random+scheme://user:pass@ahost:port/path/'))
173
self.assertEqual('random+scheme://user:pass@ahost:port/',
174
dirname('random+scheme://user:pass@ahost:port/'))
177
self.assertEqual('path/to', dirname('path/to/foo'))
178
self.assertEqual('path/to', dirname('path/to/foo/'))
179
self.assertEqual('path/to/foo',
180
dirname('path/to/foo/', exclude_trailing_slash=False))
181
self.assertEqual('path/..', dirname('path/../foo'))
182
self.assertEqual('../path', dirname('../path/foo'))
185
def test(expected, *args):
186
joined = urlutils.join(*args)
187
self.assertEqual(expected, joined)
189
# Test a single element
192
# Test relative path joining
193
test('foo/bar', 'foo', 'bar')
194
test('http://foo/bar', 'http://foo', 'bar')
195
test('http://foo/bar', 'http://foo', '.', 'bar')
196
test('http://foo/baz', 'http://foo', 'bar', '../baz')
197
test('http://foo/bar/baz', 'http://foo', 'bar/baz')
198
test('http://foo/baz', 'http://foo', 'bar/../baz')
201
test('http://bar', 'http://foo', 'http://bar')
202
test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
203
test('file:///bar', 'foo', 'file:///bar')
206
# Cannot go above root
207
self.assertRaises(InvalidURLJoin, urlutils.join,
208
'http://foo', '../baz')
210
def test_function_type(self):
211
if sys.platform == 'win32':
212
self.assertEqual(urlutils._win32_local_path_to_url, urlutils.local_path_to_url)
213
self.assertEqual(urlutils._win32_local_path_from_url, urlutils.local_path_from_url)
215
self.assertEqual(urlutils._posix_local_path_to_url, urlutils.local_path_to_url)
216
self.assertEqual(urlutils._posix_local_path_from_url, urlutils.local_path_from_url)
218
def test_posix_local_path_to_url(self):
219
to_url = urlutils._posix_local_path_to_url
220
self.assertEqual('file:///path/to/foo',
221
to_url('/path/to/foo'))
222
self.assertEqual('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s',
223
to_url(u'/path/to/r\xe4ksm\xf6rg\xe5s'))
225
def test_posix_local_path_from_url(self):
226
from_url = urlutils._posix_local_path_from_url
227
self.assertEqual('/path/to/foo',
228
from_url('file:///path/to/foo'))
229
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
230
from_url('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
231
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
232
from_url('file:///path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
234
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
236
def test_win32_local_path_to_url(self):
237
to_url = urlutils._win32_local_path_to_url
238
self.assertEqual('file:///C|/path/to/foo',
239
to_url('C:/path/to/foo'))
240
self.assertEqual('file:///D|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s',
241
to_url(u'd:/path/to/r\xe4ksm\xf6rg\xe5s'))
243
def test_win32_local_path_from_url(self):
244
from_url = urlutils._win32_local_path_from_url
245
self.assertEqual('C:/path/to/foo',
246
from_url('file:///C|/path/to/foo'))
247
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
248
from_url('file:///d|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
249
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
250
from_url('file:///d|/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
252
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
253
# Not a valid _win32 url, no drive letter
254
self.assertRaises(InvalidURL, from_url, 'file:///path/to/foo')
256
def test_split(self):
257
# Test bzrlib.urlutils.split()
258
split = urlutils.split
259
if sys.platform == 'win32':
260
self.assertRaises(InvalidURL, split, 'file:///path/to/foo')
261
self.assertEqual(('file:///C|/', 'foo'), split('file:///C|/foo'))
262
self.assertEqual(('file:///C|/', ''), split('file:///C|/'))
264
self.assertEqual(('file:///', 'foo'), split('file:///foo'))
265
self.assertEqual(('file:///', ''), split('file:///'))
267
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo'))
268
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo/'))
269
self.assertEqual(('http://host/path/to/foo', ''),
270
split('http://host/path/to/foo/', exclude_trailing_slash=False))
271
self.assertEqual(('http://host/', 'path'), split('http://host/path'))
272
self.assertEqual(('http://host/', ''), split('http://host/'))
273
self.assertEqual(('http://host', ''), split('http://host'))
274
self.assertEqual(('http:///nohost', 'path'), split('http:///nohost/path'))
276
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
277
split('random+scheme://user:pass@ahost:port/path'))
278
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
279
split('random+scheme://user:pass@ahost:port/path/'))
280
self.assertEqual(('random+scheme://user:pass@ahost:port/', ''),
281
split('random+scheme://user:pass@ahost:port/'))
284
self.assertEqual(('path/to', 'foo'), split('path/to/foo'))
285
self.assertEqual(('path/to', 'foo'), split('path/to/foo/'))
286
self.assertEqual(('path/to/foo', ''),
287
split('path/to/foo/', exclude_trailing_slash=False))
288
self.assertEqual(('path/..', 'foo'), split('path/../foo'))
289
self.assertEqual(('../path', 'foo'), split('../path/foo'))
291
def test_strip_trailing_slash(self):
292
sts = urlutils.strip_trailing_slash
293
if sys.platform == 'win32':
294
self.assertEqual('file:///C|/', sts('file:///C|/'))
295
self.assertEqual('file:///C|/foo', sts('file:///C|/foo'))
296
self.assertEqual('file:///C|/foo', sts('file:///C|/foo/'))
298
self.assertEqual('file:///', sts('file:///'))
299
self.assertEqual('file:///foo', sts('file:///foo'))
300
self.assertEqual('file:///foo', sts('file:///foo/'))
302
self.assertEqual('http://host/', sts('http://host/'))
303
self.assertEqual('http://host/foo', sts('http://host/foo'))
304
self.assertEqual('http://host/foo', sts('http://host/foo/'))
306
# No need to fail just because the slash is missing
307
self.assertEqual('http://host', sts('http://host'))
308
# TODO: jam 20060502 Should this raise InvalidURL?
309
self.assertEqual('file://', sts('file://'))
311
self.assertEqual('random+scheme://user:pass@ahost:port/path',
312
sts('random+scheme://user:pass@ahost:port/path'))
313
self.assertEqual('random+scheme://user:pass@ahost:port/path',
314
sts('random+scheme://user:pass@ahost:port/path/'))
315
self.assertEqual('random+scheme://user:pass@ahost:port/',
316
sts('random+scheme://user:pass@ahost:port/'))
318
# Make sure relative paths work too
319
self.assertEqual('path/to/foo', sts('path/to/foo'))
320
self.assertEqual('path/to/foo', sts('path/to/foo/'))
321
self.assertEqual('../to/foo', sts('../to/foo/'))
322
self.assertEqual('path/../foo', sts('path/../foo/'))
324
def test_unescape_for_display_utf8(self):
325
# Test that URLs are converted to nice unicode strings for display
326
def test(expected, url, encoding='utf-8'):
327
disp_url = urlutils.unescape_for_display(url, encoding=encoding)
328
self.assertIsInstance(disp_url, unicode)
329
self.assertEqual(expected, disp_url)
330
test('http://foo', 'http://foo')
331
if sys.platform == 'win32':
332
test('C:/foo/path', 'file:///C|foo/path')
334
test('/foo/path', 'file:///foo/path')
336
test('http://foo/%2Fbaz', 'http://foo/%2Fbaz')
337
test(u'http://host/r\xe4ksm\xf6rg\xe5s',
338
'http://host/r%C3%A4ksm%C3%B6rg%C3%A5s')
340
# Make sure special escaped characters stay escaped
341
test(u'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23',
342
'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23')
344
# Can we handle sections that don't have utf-8 encoding?
345
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
346
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s')
348
# Test encoding into output that can handle some characters
349
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
350
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s',
351
encoding='iso-8859-1')
353
# This one can be encoded into utf8
354
test(u'http://host/\u062c\u0648\u062c\u0648',
355
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
358
# This can't be put into 8859-1 and so stays as escapes
359
test(u'http://host/%d8%ac%d9%88%d8%ac%d9%88',
360
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
361
encoding='iso-8859-1')
363
def test_escape(self):
364
self.assertEqual('%25', urlutils.escape('%'))
365
self.assertEqual('%C3%A5', urlutils.escape(u'\xe5'))
367
def test_unescape(self):
368
self.assertEqual('%', urlutils.unescape('%25'))
369
self.assertEqual(u'\xe5', urlutils.unescape('%C3%A5'))
371
self.assertRaises(InvalidURL, urlutils.unescape, u'\xe5')
372
self.assertRaises(InvalidURL, urlutils.unescape, '\xe5')
373
self.assertRaises(InvalidURL, urlutils.unescape, '%E5')
375
def test_escape_unescape(self):
376
self.assertEqual(u'\xe5', urlutils.unescape(urlutils.escape(u'\xe5')))
377
self.assertEqual('%', urlutils.unescape(urlutils.escape('%')))