1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tests for the urlutils wrapper."""
23
from bzrlib import osutils, urlutils
25
from bzrlib.errors import InvalidURL, InvalidURLJoin
26
from bzrlib.tests import TestCaseInTempDir, TestCase, TestSkipped
29
class TestUrlToPath(TestCase):
31
def test_basename(self):
32
# bzrlib.urlutils.basename
33
# Test bzrlib.urlutils.split()
34
basename = urlutils.basename
35
if sys.platform == 'win32':
36
self.assertRaises(InvalidURL, basename, 'file:///path/to/foo')
37
self.assertEqual('foo', basename('file:///C|/foo'))
38
self.assertEqual('foo', basename('file:///C:/foo'))
39
self.assertEqual('', basename('file:///C:/'))
41
self.assertEqual('foo', basename('file:///foo'))
42
self.assertEqual('', basename('file:///'))
44
self.assertEqual('foo', basename('http://host/path/to/foo'))
45
self.assertEqual('foo', basename('http://host/path/to/foo/'))
47
basename('http://host/path/to/foo/', exclude_trailing_slash=False))
48
self.assertEqual('path', basename('http://host/path'))
49
self.assertEqual('', basename('http://host/'))
50
self.assertEqual('', basename('http://host'))
51
self.assertEqual('path', basename('http:///nohost/path'))
53
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path'))
54
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path/'))
55
self.assertEqual('', basename('random+scheme://user:pass@ahost:port/'))
58
self.assertEqual('foo', basename('path/to/foo'))
59
self.assertEqual('foo', basename('path/to/foo/'))
60
self.assertEqual('', basename('path/to/foo/',
61
exclude_trailing_slash=False))
62
self.assertEqual('foo', basename('path/../foo'))
63
self.assertEqual('foo', basename('../path/foo'))
65
def test_normalize_url_files(self):
66
# Test that local paths are properly normalized
67
normalize_url = urlutils.normalize_url
69
def norm_file(expected, path):
70
url = normalize_url(path)
71
self.assertStartsWith(url, 'file:///')
72
if sys.platform == 'win32':
73
url = url[len('file:///C:'):]
75
url = url[len('file://'):]
77
self.assertEndsWith(url, expected)
79
norm_file('path/to/foo', 'path/to/foo')
80
norm_file('/path/to/foo', '/path/to/foo')
81
norm_file('path/to/foo', '../path/to/foo')
83
# Local paths are assumed to *not* be escaped at all
85
u'uni/\xb5'.encode(bzrlib.user_encoding)
87
# locale cannot handle unicode
90
norm_file('uni/%C2%B5', u'uni/\xb5')
92
norm_file('uni/%25C2%25B5', u'uni/%C2%B5')
93
norm_file('uni/%20b', u'uni/ b')
94
# All the crazy characters get escaped in local paths => file:/// urls
95
# The ' ' character must not be at the end, because on win32
96
# it gets stripped off by ntpath.abspath
97
norm_file('%27%20%3B/%3F%3A%40%26%3D%2B%24%2C%23', "' ;/?:@&=+$,#")
99
def test_normalize_url_hybrid(self):
100
# Anything with a scheme:// should be treated as a hybrid url
101
# which changes what characters get escaped.
102
normalize_url = urlutils.normalize_url
104
eq = self.assertEqual
105
eq('file:///foo/', normalize_url(u'file:///foo/'))
106
eq('file:///foo/%20', normalize_url(u'file:///foo/ '))
107
eq('file:///foo/%20', normalize_url(u'file:///foo/%20'))
108
# Don't escape reserved characters
109
eq('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$',
110
normalize_url('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
111
eq('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$',
112
normalize_url('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
114
# Escape unicode characters, but not already escaped chars
115
eq('http://host/ab/%C2%B5/%C2%B5',
116
normalize_url(u'http://host/ab/%C2%B5/\xb5'))
118
# Normalize verifies URLs when they are not unicode
119
# (indicating they did not come from the user)
120
self.assertRaises(InvalidURL, normalize_url, 'http://host/\xb5')
121
self.assertRaises(InvalidURL, normalize_url, 'http://host/ ')
123
def test_url_scheme_re(self):
124
# Test paths that may be URLs
125
def test_one(url, scheme_and_path):
126
"""Assert that _url_scheme_re correctly matches
128
:param scheme_and_path: The (scheme, path) that should be matched
129
can be None, to indicate it should not match
131
m = urlutils._url_scheme_re.match(url)
132
if scheme_and_path is None:
133
self.assertEqual(None, m)
135
self.assertEqual(scheme_and_path[0], m.group('scheme'))
136
self.assertEqual(scheme_and_path[1], m.group('path'))
139
test_one('/path', None)
140
test_one('C:/path', None)
141
test_one('../path/to/foo', None)
142
test_one(u'../path/to/fo\xe5', None)
145
test_one('http://host/path/', ('http', 'host/path/'))
146
test_one('sftp://host/path/to/foo', ('sftp', 'host/path/to/foo'))
147
test_one('file:///usr/bin', ('file', '/usr/bin'))
148
test_one('file:///C:/Windows', ('file', '/C:/Windows'))
149
test_one('file:///C|/Windows', ('file', '/C|/Windows'))
150
test_one(u'readonly+sftp://host/path/\xe5', ('readonly+sftp', u'host/path/\xe5'))
153
# Can't have slashes or colons in the scheme
154
test_one('/path/to/://foo', None)
155
test_one('path:path://foo', None)
156
# Must have more than one character for scheme
157
test_one('C://foo', None)
158
test_one('ab://foo', ('ab', 'foo'))
160
def test_dirname(self):
161
# Test bzrlib.urlutils.dirname()
162
dirname = urlutils.dirname
163
if sys.platform == 'win32':
164
self.assertRaises(InvalidURL, dirname, 'file:///path/to/foo')
165
self.assertEqual('file:///C|/', dirname('file:///C|/foo'))
166
self.assertEqual('file:///C|/', dirname('file:///C|/'))
168
self.assertEqual('file:///', dirname('file:///foo'))
169
self.assertEqual('file:///', dirname('file:///'))
171
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo'))
172
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo/'))
173
self.assertEqual('http://host/path/to/foo',
174
dirname('http://host/path/to/foo/', exclude_trailing_slash=False))
175
self.assertEqual('http://host/', dirname('http://host/path'))
176
self.assertEqual('http://host/', dirname('http://host/'))
177
self.assertEqual('http://host', dirname('http://host'))
178
self.assertEqual('http:///nohost', dirname('http:///nohost/path'))
180
self.assertEqual('random+scheme://user:pass@ahost:port/',
181
dirname('random+scheme://user:pass@ahost:port/path'))
182
self.assertEqual('random+scheme://user:pass@ahost:port/',
183
dirname('random+scheme://user:pass@ahost:port/path/'))
184
self.assertEqual('random+scheme://user:pass@ahost:port/',
185
dirname('random+scheme://user:pass@ahost:port/'))
188
self.assertEqual('path/to', dirname('path/to/foo'))
189
self.assertEqual('path/to', dirname('path/to/foo/'))
190
self.assertEqual('path/to/foo',
191
dirname('path/to/foo/', exclude_trailing_slash=False))
192
self.assertEqual('path/..', dirname('path/../foo'))
193
self.assertEqual('../path', dirname('../path/foo'))
196
def test(expected, *args):
197
joined = urlutils.join(*args)
198
self.assertEqual(expected, joined)
200
# Test a single element
203
# Test relative path joining
204
test('foo/bar', 'foo', 'bar')
205
test('http://foo/bar', 'http://foo', 'bar')
206
test('http://foo/bar', 'http://foo', '.', 'bar')
207
test('http://foo/baz', 'http://foo', 'bar', '../baz')
208
test('http://foo/bar/baz', 'http://foo', 'bar/baz')
209
test('http://foo/baz', 'http://foo', 'bar/../baz')
212
test('http://bar', 'http://foo', 'http://bar')
213
test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
214
test('file:///bar', 'foo', 'file:///bar')
217
test('file:///foo', 'file:///', 'foo')
218
test('file:///bar/foo', 'file:///bar/', 'foo')
219
test('http://host/foo', 'http://host/', 'foo')
220
test('http://host/', 'http://host', '')
223
# Cannot go above root
224
self.assertRaises(InvalidURLJoin, urlutils.join,
225
'http://foo', '../baz')
227
def test_function_type(self):
228
if sys.platform == 'win32':
229
self.assertEqual(urlutils._win32_local_path_to_url, urlutils.local_path_to_url)
230
self.assertEqual(urlutils._win32_local_path_from_url, urlutils.local_path_from_url)
232
self.assertEqual(urlutils._posix_local_path_to_url, urlutils.local_path_to_url)
233
self.assertEqual(urlutils._posix_local_path_from_url, urlutils.local_path_from_url)
235
def test_posix_local_path_to_url(self):
236
to_url = urlutils._posix_local_path_to_url
237
self.assertEqual('file:///path/to/foo',
238
to_url('/path/to/foo'))
241
result = to_url(u'/path/to/r\xe4ksm\xf6rg\xe5s')
243
raise TestSkipped("local encoding cannot handle unicode")
245
self.assertEqual('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
247
def test_posix_local_path_from_url(self):
248
from_url = urlutils._posix_local_path_from_url
249
self.assertEqual('/path/to/foo',
250
from_url('file:///path/to/foo'))
251
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
252
from_url('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
253
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
254
from_url('file:///path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
256
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
258
def test_win32_local_path_to_url(self):
259
to_url = urlutils._win32_local_path_to_url
260
self.assertEqual('file:///C:/path/to/foo',
261
to_url('C:/path/to/foo'))
262
# BOGUS: on win32, ntpath.abspath will strip trailing
263
# whitespace, so this will always fail
264
# Though under linux, it fakes abspath support
265
# and thus will succeed
266
# self.assertEqual('file:///C:/path/to/foo%20',
267
# to_url('C:/path/to/foo '))
268
self.assertEqual('file:///C:/path/to/f%20oo',
269
to_url('C:/path/to/f oo'))
272
result = to_url(u'd:/path/to/r\xe4ksm\xf6rg\xe5s')
274
raise TestSkipped("local encoding cannot handle unicode")
276
self.assertEqual('file:///D:/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
278
def test_win32_unc_path_to_url(self):
279
to_url = urlutils._win32_local_path_to_url
280
self.assertEqual('file://HOST/path',
281
to_url(r'\\HOST\path'))
282
self.assertEqual('file://HOST/path',
283
to_url('//HOST/path'))
286
result = to_url(u'//HOST/path/to/r\xe4ksm\xf6rg\xe5s')
288
raise TestSkipped("local encoding cannot handle unicode")
290
self.assertEqual('file://HOST/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
293
def test_win32_local_path_from_url(self):
294
from_url = urlutils._win32_local_path_from_url
295
self.assertEqual('C:/path/to/foo',
296
from_url('file:///C|/path/to/foo'))
297
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
298
from_url('file:///d|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
299
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
300
from_url('file:///d:/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
302
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
303
# Not a valid _win32 url, no drive letter
304
self.assertRaises(InvalidURL, from_url, 'file:///path/to/foo')
306
def test_win32_unc_path_from_url(self):
307
from_url = urlutils._win32_local_path_from_url
308
self.assertEqual('//HOST/path', from_url('file://HOST/path'))
309
# despite IE allows 2, 4, 5 and 6 slashes in URL to another machine
310
# we want to use only 2 slashes
311
# Firefox understand only 5 slashes in URL, but it's ugly
312
self.assertRaises(InvalidURL, from_url, 'file:////HOST/path')
313
self.assertRaises(InvalidURL, from_url, 'file://///HOST/path')
314
self.assertRaises(InvalidURL, from_url, 'file://////HOST/path')
315
# check for file://C:/ instead of file:///C:/
316
self.assertRaises(InvalidURL, from_url, 'file://C:/path')
318
def test_win32_extract_drive_letter(self):
319
extract = urlutils._win32_extract_drive_letter
320
self.assertEqual(('file:///C:', '/foo'), extract('file://', '/C:/foo'))
321
self.assertEqual(('file:///d|', '/path'), extract('file://', '/d|/path'))
322
self.assertRaises(InvalidURL, extract, 'file://', '/path')
324
def test_split(self):
325
# Test bzrlib.urlutils.split()
326
split = urlutils.split
327
if sys.platform == 'win32':
328
self.assertRaises(InvalidURL, split, 'file:///path/to/foo')
329
self.assertEqual(('file:///C|/', 'foo'), split('file:///C|/foo'))
330
self.assertEqual(('file:///C:/', ''), split('file:///C:/'))
332
self.assertEqual(('file:///', 'foo'), split('file:///foo'))
333
self.assertEqual(('file:///', ''), split('file:///'))
335
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo'))
336
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo/'))
337
self.assertEqual(('http://host/path/to/foo', ''),
338
split('http://host/path/to/foo/', exclude_trailing_slash=False))
339
self.assertEqual(('http://host/', 'path'), split('http://host/path'))
340
self.assertEqual(('http://host/', ''), split('http://host/'))
341
self.assertEqual(('http://host', ''), split('http://host'))
342
self.assertEqual(('http:///nohost', 'path'), split('http:///nohost/path'))
344
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
345
split('random+scheme://user:pass@ahost:port/path'))
346
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
347
split('random+scheme://user:pass@ahost:port/path/'))
348
self.assertEqual(('random+scheme://user:pass@ahost:port/', ''),
349
split('random+scheme://user:pass@ahost:port/'))
352
self.assertEqual(('path/to', 'foo'), split('path/to/foo'))
353
self.assertEqual(('path/to', 'foo'), split('path/to/foo/'))
354
self.assertEqual(('path/to/foo', ''),
355
split('path/to/foo/', exclude_trailing_slash=False))
356
self.assertEqual(('path/..', 'foo'), split('path/../foo'))
357
self.assertEqual(('../path', 'foo'), split('../path/foo'))
359
def test_win32_strip_local_trailing_slash(self):
360
strip = urlutils._win32_strip_local_trailing_slash
361
self.assertEqual('file://', strip('file://'))
362
self.assertEqual('file:///', strip('file:///'))
363
self.assertEqual('file:///C', strip('file:///C'))
364
self.assertEqual('file:///C:', strip('file:///C:'))
365
self.assertEqual('file:///d|', strip('file:///d|'))
366
self.assertEqual('file:///C:/', strip('file:///C:/'))
367
self.assertEqual('file:///C:/a', strip('file:///C:/a/'))
369
def test_strip_trailing_slash(self):
370
sts = urlutils.strip_trailing_slash
371
if sys.platform == 'win32':
372
self.assertEqual('file:///C|/', sts('file:///C|/'))
373
self.assertEqual('file:///C:/foo', sts('file:///C:/foo'))
374
self.assertEqual('file:///C|/foo', sts('file:///C|/foo/'))
376
self.assertEqual('file:///', sts('file:///'))
377
self.assertEqual('file:///foo', sts('file:///foo'))
378
self.assertEqual('file:///foo', sts('file:///foo/'))
380
self.assertEqual('http://host/', sts('http://host/'))
381
self.assertEqual('http://host/foo', sts('http://host/foo'))
382
self.assertEqual('http://host/foo', sts('http://host/foo/'))
384
# No need to fail just because the slash is missing
385
self.assertEqual('http://host', sts('http://host'))
386
# TODO: jam 20060502 Should this raise InvalidURL?
387
self.assertEqual('file://', sts('file://'))
389
self.assertEqual('random+scheme://user:pass@ahost:port/path',
390
sts('random+scheme://user:pass@ahost:port/path'))
391
self.assertEqual('random+scheme://user:pass@ahost:port/path',
392
sts('random+scheme://user:pass@ahost:port/path/'))
393
self.assertEqual('random+scheme://user:pass@ahost:port/',
394
sts('random+scheme://user:pass@ahost:port/'))
396
# Make sure relative paths work too
397
self.assertEqual('path/to/foo', sts('path/to/foo'))
398
self.assertEqual('path/to/foo', sts('path/to/foo/'))
399
self.assertEqual('../to/foo', sts('../to/foo/'))
400
self.assertEqual('path/../foo', sts('path/../foo/'))
402
def test_unescape_for_display_utf8(self):
403
# Test that URLs are converted to nice unicode strings for display
404
def test(expected, url, encoding='utf-8'):
405
disp_url = urlutils.unescape_for_display(url, encoding=encoding)
406
self.assertIsInstance(disp_url, unicode)
407
self.assertEqual(expected, disp_url)
409
test('http://foo', 'http://foo')
410
if sys.platform == 'win32':
411
test('C:/foo/path', 'file:///C|/foo/path')
412
test('C:/foo/path', 'file:///C:/foo/path')
414
test('/foo/path', 'file:///foo/path')
416
test('http://foo/%2Fbaz', 'http://foo/%2Fbaz')
417
test(u'http://host/r\xe4ksm\xf6rg\xe5s',
418
'http://host/r%C3%A4ksm%C3%B6rg%C3%A5s')
420
# Make sure special escaped characters stay escaped
421
test(u'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23',
422
'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23')
424
# Can we handle sections that don't have utf-8 encoding?
425
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
426
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s')
428
# Test encoding into output that can handle some characters
429
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
430
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s',
431
encoding='iso-8859-1')
433
# This one can be encoded into utf8
434
test(u'http://host/\u062c\u0648\u062c\u0648',
435
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
438
# This can't be put into 8859-1 and so stays as escapes
439
test(u'http://host/%d8%ac%d9%88%d8%ac%d9%88',
440
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
441
encoding='iso-8859-1')
443
def test_escape(self):
444
self.assertEqual('%25', urlutils.escape('%'))
445
self.assertEqual('%C3%A5', urlutils.escape(u'\xe5'))
447
def test_unescape(self):
448
self.assertEqual('%', urlutils.unescape('%25'))
449
self.assertEqual(u'\xe5', urlutils.unescape('%C3%A5'))
451
self.assertRaises(InvalidURL, urlutils.unescape, u'\xe5')
452
self.assertRaises(InvalidURL, urlutils.unescape, '\xe5')
453
self.assertRaises(InvalidURL, urlutils.unescape, '%E5')
455
def test_escape_unescape(self):
456
self.assertEqual(u'\xe5', urlutils.unescape(urlutils.escape(u'\xe5')))
457
self.assertEqual('%', urlutils.unescape(urlutils.escape('%')))
459
def test_relative_url(self):
460
def test(expected, base, other):
461
result = urlutils.relative_url(base, other)
462
self.assertEqual(expected, result)
464
test('a', 'http://host/', 'http://host/a')
465
test('http://entirely/different', 'sftp://host/branch',
466
'http://entirely/different')
467
test('../person/feature', 'http://host/branch/mainline',
468
'http://host/branch/person/feature')
469
test('..', 'http://host/branch', 'http://host/')
470
test('http://host2/branch', 'http://host1/branch', 'http://host2/branch')
471
test('.', 'http://host1/branch', 'http://host1/branch')
472
test('../../../branch/2b', 'file:///home/jelmer/foo/bar/2b',
473
'file:///home/jelmer/branch/2b')
474
test('../../branch/2b', 'sftp://host/home/jelmer/bar/2b',
475
'sftp://host/home/jelmer/branch/2b')
476
test('../../branch/feature/%2b', 'http://host/home/jelmer/bar/%2b',
477
'http://host/home/jelmer/branch/feature/%2b')
478
test('../../branch/feature/2b', 'http://host/home/jelmer/bar/2b/',
479
'http://host/home/jelmer/branch/feature/2b')
480
# relative_url should preserve a trailing slash
481
test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b/',
482
'http://host/home/jelmer/branch/feature/2b/')
483
test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b',
484
'http://host/home/jelmer/branch/feature/2b/')
486
# TODO: treat http://host as http://host/
487
# relative_url is typically called from a branch.base or
488
# transport.base which always ends with a /
489
#test('a', 'http://host', 'http://host/a')
490
test('http://host/a', 'http://host', 'http://host/a')
491
#test('.', 'http://host', 'http://host/')
492
test('http://host/', 'http://host', 'http://host/')
493
#test('.', 'http://host/', 'http://host')
494
test('http://host', 'http://host/', 'http://host')
497
class TestCwdToURL(TestCaseInTempDir):
498
"""Test that local_path_to_url works base on the cwd"""
501
# This test will fail if getcwd is not ascii
505
url = urlutils.local_path_to_url('.')
506
self.assertEndsWith(url, '/mytest')
508
def test_non_ascii(self):
512
raise TestSkipped('cannot create unicode directory')
516
# On Mac OSX this directory is actually:
517
# u'/dode\u0301' => '/dode\xcc\x81
518
# but we should normalize it back to
519
# u'/dod\xe9' => '/dod\xc3\xa9'
520
url = urlutils.local_path_to_url('.')
521
self.assertEndsWith(url, '/dod%C3%A9')