1
# Copyright (C) 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tests for the urlutils wrapper."""
23
from bzrlib.errors import InvalidURL, InvalidURLJoin
24
import bzrlib.urlutils as urlutils
25
from bzrlib.tests import TestCaseInTempDir, TestCase, TestSkipped
28
class TestUrlToPath(TestCase):
30
def test_basename(self):
31
# bzrlib.urlutils.basename
32
# Test bzrlib.urlutils.split()
33
basename = urlutils.basename
34
if sys.platform == 'win32':
35
self.assertRaises(InvalidURL, basename, 'file:///path/to/foo')
36
self.assertEqual('foo', basename('file:///C|/foo'))
37
self.assertEqual('foo', basename('file:///C:/foo'))
38
self.assertEqual('', basename('file:///C:/'))
40
self.assertEqual('foo', basename('file:///foo'))
41
self.assertEqual('', basename('file:///'))
43
self.assertEqual('foo', basename('http://host/path/to/foo'))
44
self.assertEqual('foo', basename('http://host/path/to/foo/'))
46
basename('http://host/path/to/foo/', exclude_trailing_slash=False))
47
self.assertEqual('path', basename('http://host/path'))
48
self.assertEqual('', basename('http://host/'))
49
self.assertEqual('', basename('http://host'))
50
self.assertEqual('path', basename('http:///nohost/path'))
52
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path'))
53
self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path/'))
54
self.assertEqual('', basename('random+scheme://user:pass@ahost:port/'))
57
self.assertEqual('foo', basename('path/to/foo'))
58
self.assertEqual('foo', basename('path/to/foo/'))
59
self.assertEqual('', basename('path/to/foo/',
60
exclude_trailing_slash=False))
61
self.assertEqual('foo', basename('path/../foo'))
62
self.assertEqual('foo', basename('../path/foo'))
64
def test_normalize_url_files(self):
65
# Test that local paths are properly normalized
66
normalize_url = urlutils.normalize_url
68
def norm_file(expected, path):
69
url = normalize_url(path)
70
self.assertStartsWith(url, 'file:///')
71
if sys.platform == 'win32':
72
url = url[len('file:///C:'):]
74
url = url[len('file://'):]
76
self.assertEndsWith(url, expected)
78
norm_file('path/to/foo', 'path/to/foo')
79
norm_file('/path/to/foo', '/path/to/foo')
80
norm_file('path/to/foo', '../path/to/foo')
82
# Local paths are assumed to *not* be escaped at all
84
u'uni/\xb5'.encode(bzrlib.user_encoding)
86
# locale cannot handle unicode
89
norm_file('uni/%C2%B5', u'uni/\xb5')
91
norm_file('uni/%25C2%25B5', u'uni/%C2%B5')
92
norm_file('uni/%20b', u'uni/ b')
93
# All the crazy characters get escaped in local paths => file:/// urls
94
# The ' ' character must not be at the end, because on win32
95
# it gets stripped off by ntpath.abspath
96
norm_file('%27%20%3B/%3F%3A%40%26%3D%2B%24%2C%23', "' ;/?:@&=+$,#")
98
def test_normalize_url_hybrid(self):
99
# Anything with a scheme:// should be treated as a hybrid url
100
# which changes what characters get escaped.
101
normalize_url = urlutils.normalize_url
103
eq = self.assertEqual
104
eq('file:///foo/', normalize_url(u'file:///foo/'))
105
eq('file:///foo/%20', normalize_url(u'file:///foo/ '))
106
eq('file:///foo/%20', normalize_url(u'file:///foo/%20'))
107
# Don't escape reserved characters
108
eq('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$',
109
normalize_url('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
110
eq('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$',
111
normalize_url('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
113
# Escape unicode characters, but not already escaped chars
114
eq('http://host/ab/%C2%B5/%C2%B5',
115
normalize_url(u'http://host/ab/%C2%B5/\xb5'))
117
# Normalize verifies URLs when they are not unicode
118
# (indicating they did not come from the user)
119
self.assertRaises(InvalidURL, normalize_url, 'http://host/\xb5')
120
self.assertRaises(InvalidURL, normalize_url, 'http://host/ ')
122
def test_url_scheme_re(self):
123
# Test paths that may be URLs
124
def test_one(url, scheme_and_path):
125
"""Assert that _url_scheme_re correctly matches
127
:param scheme_and_path: The (scheme, path) that should be matched
128
can be None, to indicate it should not match
130
m = urlutils._url_scheme_re.match(url)
131
if scheme_and_path is None:
132
self.assertEqual(None, m)
134
self.assertEqual(scheme_and_path[0], m.group('scheme'))
135
self.assertEqual(scheme_and_path[1], m.group('path'))
138
test_one('/path', None)
139
test_one('C:/path', None)
140
test_one('../path/to/foo', None)
141
test_one(u'../path/to/fo\xe5', None)
144
test_one('http://host/path/', ('http', 'host/path/'))
145
test_one('sftp://host/path/to/foo', ('sftp', 'host/path/to/foo'))
146
test_one('file:///usr/bin', ('file', '/usr/bin'))
147
test_one('file:///C:/Windows', ('file', '/C:/Windows'))
148
test_one('file:///C|/Windows', ('file', '/C|/Windows'))
149
test_one(u'readonly+sftp://host/path/\xe5', ('readonly+sftp', u'host/path/\xe5'))
152
# Can't have slashes or colons in the scheme
153
test_one('/path/to/://foo', None)
154
test_one('path:path://foo', None)
155
# Must have more than one character for scheme
156
test_one('C://foo', None)
157
test_one('ab://foo', ('ab', 'foo'))
159
def test_dirname(self):
160
# Test bzrlib.urlutils.dirname()
161
dirname = urlutils.dirname
162
if sys.platform == 'win32':
163
self.assertRaises(InvalidURL, dirname, 'file:///path/to/foo')
164
self.assertEqual('file:///C|/', dirname('file:///C|/foo'))
165
self.assertEqual('file:///C|/', dirname('file:///C|/'))
167
self.assertEqual('file:///', dirname('file:///foo'))
168
self.assertEqual('file:///', dirname('file:///'))
170
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo'))
171
self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo/'))
172
self.assertEqual('http://host/path/to/foo',
173
dirname('http://host/path/to/foo/', exclude_trailing_slash=False))
174
self.assertEqual('http://host/', dirname('http://host/path'))
175
self.assertEqual('http://host/', dirname('http://host/'))
176
self.assertEqual('http://host', dirname('http://host'))
177
self.assertEqual('http:///nohost', dirname('http:///nohost/path'))
179
self.assertEqual('random+scheme://user:pass@ahost:port/',
180
dirname('random+scheme://user:pass@ahost:port/path'))
181
self.assertEqual('random+scheme://user:pass@ahost:port/',
182
dirname('random+scheme://user:pass@ahost:port/path/'))
183
self.assertEqual('random+scheme://user:pass@ahost:port/',
184
dirname('random+scheme://user:pass@ahost:port/'))
187
self.assertEqual('path/to', dirname('path/to/foo'))
188
self.assertEqual('path/to', dirname('path/to/foo/'))
189
self.assertEqual('path/to/foo',
190
dirname('path/to/foo/', exclude_trailing_slash=False))
191
self.assertEqual('path/..', dirname('path/../foo'))
192
self.assertEqual('../path', dirname('../path/foo'))
195
def test(expected, *args):
196
joined = urlutils.join(*args)
197
self.assertEqual(expected, joined)
199
# Test a single element
202
# Test relative path joining
203
test('foo/bar', 'foo', 'bar')
204
test('http://foo/bar', 'http://foo', 'bar')
205
test('http://foo/bar', 'http://foo', '.', 'bar')
206
test('http://foo/baz', 'http://foo', 'bar', '../baz')
207
test('http://foo/bar/baz', 'http://foo', 'bar/baz')
208
test('http://foo/baz', 'http://foo', 'bar/../baz')
211
test('http://bar', 'http://foo', 'http://bar')
212
test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
213
test('file:///bar', 'foo', 'file:///bar')
216
test('file:///foo', 'file:///', 'foo')
217
test('file:///bar/foo', 'file:///bar/', 'foo')
218
test('http://host/foo', 'http://host/', 'foo')
219
test('http://host/', 'http://host', '')
222
# Cannot go above root
223
self.assertRaises(InvalidURLJoin, urlutils.join,
224
'http://foo', '../baz')
226
def test_function_type(self):
227
if sys.platform == 'win32':
228
self.assertEqual(urlutils._win32_local_path_to_url, urlutils.local_path_to_url)
229
self.assertEqual(urlutils._win32_local_path_from_url, urlutils.local_path_from_url)
231
self.assertEqual(urlutils._posix_local_path_to_url, urlutils.local_path_to_url)
232
self.assertEqual(urlutils._posix_local_path_from_url, urlutils.local_path_from_url)
234
def test_posix_local_path_to_url(self):
235
to_url = urlutils._posix_local_path_to_url
236
self.assertEqual('file:///path/to/foo',
237
to_url('/path/to/foo'))
240
result = to_url(u'/path/to/r\xe4ksm\xf6rg\xe5s')
242
raise TestSkipped("local encoding cannot handle unicode")
244
self.assertEqual('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
246
def test_posix_local_path_from_url(self):
247
from_url = urlutils._posix_local_path_from_url
248
self.assertEqual('/path/to/foo',
249
from_url('file:///path/to/foo'))
250
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
251
from_url('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
252
self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
253
from_url('file:///path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
255
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
257
def test_win32_local_path_to_url(self):
258
to_url = urlutils._win32_local_path_to_url
259
self.assertEqual('file:///C:/path/to/foo',
260
to_url('C:/path/to/foo'))
261
# BOGUS: on win32, ntpath.abspath will strip trailing
262
# whitespace, so this will always fail
263
# Though under linux, it fakes abspath support
264
# and thus will succeed
265
# self.assertEqual('file:///C:/path/to/foo%20',
266
# to_url('C:/path/to/foo '))
267
self.assertEqual('file:///C:/path/to/f%20oo',
268
to_url('C:/path/to/f oo'))
271
result = to_url(u'd:/path/to/r\xe4ksm\xf6rg\xe5s')
273
raise TestSkipped("local encoding cannot handle unicode")
275
self.assertEqual('file:///D:/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
277
def test_win32_local_path_from_url(self):
278
from_url = urlutils._win32_local_path_from_url
279
self.assertEqual('C:/path/to/foo',
280
from_url('file:///C|/path/to/foo'))
281
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
282
from_url('file:///d|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
283
self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
284
from_url('file:///d:/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
286
self.assertRaises(InvalidURL, from_url, '/path/to/foo')
287
# Not a valid _win32 url, no drive letter
288
self.assertRaises(InvalidURL, from_url, 'file:///path/to/foo')
290
def test__win32_extract_drive_letter(self):
291
extract = urlutils._win32_extract_drive_letter
292
self.assertEqual(('file:///C:', '/foo'), extract('file://', '/C:/foo'))
293
self.assertEqual(('file:///d|', '/path'), extract('file://', '/d|/path'))
294
self.assertRaises(InvalidURL, extract, 'file://', '/path')
296
def test_split(self):
297
# Test bzrlib.urlutils.split()
298
split = urlutils.split
299
if sys.platform == 'win32':
300
self.assertRaises(InvalidURL, split, 'file:///path/to/foo')
301
self.assertEqual(('file:///C|/', 'foo'), split('file:///C|/foo'))
302
self.assertEqual(('file:///C:/', ''), split('file:///C:/'))
304
self.assertEqual(('file:///', 'foo'), split('file:///foo'))
305
self.assertEqual(('file:///', ''), split('file:///'))
307
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo'))
308
self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo/'))
309
self.assertEqual(('http://host/path/to/foo', ''),
310
split('http://host/path/to/foo/', exclude_trailing_slash=False))
311
self.assertEqual(('http://host/', 'path'), split('http://host/path'))
312
self.assertEqual(('http://host/', ''), split('http://host/'))
313
self.assertEqual(('http://host', ''), split('http://host'))
314
self.assertEqual(('http:///nohost', 'path'), split('http:///nohost/path'))
316
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
317
split('random+scheme://user:pass@ahost:port/path'))
318
self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
319
split('random+scheme://user:pass@ahost:port/path/'))
320
self.assertEqual(('random+scheme://user:pass@ahost:port/', ''),
321
split('random+scheme://user:pass@ahost:port/'))
324
self.assertEqual(('path/to', 'foo'), split('path/to/foo'))
325
self.assertEqual(('path/to', 'foo'), split('path/to/foo/'))
326
self.assertEqual(('path/to/foo', ''),
327
split('path/to/foo/', exclude_trailing_slash=False))
328
self.assertEqual(('path/..', 'foo'), split('path/../foo'))
329
self.assertEqual(('../path', 'foo'), split('../path/foo'))
331
def test__win32_strip_local_trailing_slash(self):
332
strip = urlutils._win32_strip_local_trailing_slash
333
self.assertEqual('file://', strip('file://'))
334
self.assertEqual('file:///', strip('file:///'))
335
self.assertEqual('file:///C', strip('file:///C'))
336
self.assertEqual('file:///C:', strip('file:///C:'))
337
self.assertEqual('file:///d|', strip('file:///d|'))
338
self.assertEqual('file:///C:/', strip('file:///C:/'))
339
self.assertEqual('file:///C:/a', strip('file:///C:/a/'))
341
def test_strip_trailing_slash(self):
342
sts = urlutils.strip_trailing_slash
343
if sys.platform == 'win32':
344
self.assertEqual('file:///C|/', sts('file:///C|/'))
345
self.assertEqual('file:///C:/foo', sts('file:///C:/foo'))
346
self.assertEqual('file:///C|/foo', sts('file:///C|/foo/'))
348
self.assertEqual('file:///', sts('file:///'))
349
self.assertEqual('file:///foo', sts('file:///foo'))
350
self.assertEqual('file:///foo', sts('file:///foo/'))
352
self.assertEqual('http://host/', sts('http://host/'))
353
self.assertEqual('http://host/foo', sts('http://host/foo'))
354
self.assertEqual('http://host/foo', sts('http://host/foo/'))
356
# No need to fail just because the slash is missing
357
self.assertEqual('http://host', sts('http://host'))
358
# TODO: jam 20060502 Should this raise InvalidURL?
359
self.assertEqual('file://', sts('file://'))
361
self.assertEqual('random+scheme://user:pass@ahost:port/path',
362
sts('random+scheme://user:pass@ahost:port/path'))
363
self.assertEqual('random+scheme://user:pass@ahost:port/path',
364
sts('random+scheme://user:pass@ahost:port/path/'))
365
self.assertEqual('random+scheme://user:pass@ahost:port/',
366
sts('random+scheme://user:pass@ahost:port/'))
368
# Make sure relative paths work too
369
self.assertEqual('path/to/foo', sts('path/to/foo'))
370
self.assertEqual('path/to/foo', sts('path/to/foo/'))
371
self.assertEqual('../to/foo', sts('../to/foo/'))
372
self.assertEqual('path/../foo', sts('path/../foo/'))
374
def test_unescape_for_display_utf8(self):
375
# Test that URLs are converted to nice unicode strings for display
376
def test(expected, url, encoding='utf-8'):
377
disp_url = urlutils.unescape_for_display(url, encoding=encoding)
378
self.assertIsInstance(disp_url, unicode)
379
self.assertEqual(expected, disp_url)
381
test('http://foo', 'http://foo')
382
if sys.platform == 'win32':
383
test('c:/foo/path', 'file:///C|/foo/path')
384
test('c:/foo/path', 'file:///C:/foo/path')
386
test('/foo/path', 'file:///foo/path')
388
test('http://foo/%2Fbaz', 'http://foo/%2Fbaz')
389
test(u'http://host/r\xe4ksm\xf6rg\xe5s',
390
'http://host/r%C3%A4ksm%C3%B6rg%C3%A5s')
392
# Make sure special escaped characters stay escaped
393
test(u'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23',
394
'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23')
396
# Can we handle sections that don't have utf-8 encoding?
397
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
398
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s')
400
# Test encoding into output that can handle some characters
401
test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
402
'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s',
403
encoding='iso-8859-1')
405
# This one can be encoded into utf8
406
test(u'http://host/\u062c\u0648\u062c\u0648',
407
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
410
# This can't be put into 8859-1 and so stays as escapes
411
test(u'http://host/%d8%ac%d9%88%d8%ac%d9%88',
412
'http://host/%d8%ac%d9%88%d8%ac%d9%88',
413
encoding='iso-8859-1')
415
def test_escape(self):
416
self.assertEqual('%25', urlutils.escape('%'))
417
self.assertEqual('%C3%A5', urlutils.escape(u'\xe5'))
419
def test_unescape(self):
420
self.assertEqual('%', urlutils.unescape('%25'))
421
self.assertEqual(u'\xe5', urlutils.unescape('%C3%A5'))
423
self.assertRaises(InvalidURL, urlutils.unescape, u'\xe5')
424
self.assertRaises(InvalidURL, urlutils.unescape, '\xe5')
425
self.assertRaises(InvalidURL, urlutils.unescape, '%E5')
427
def test_escape_unescape(self):
428
self.assertEqual(u'\xe5', urlutils.unescape(urlutils.escape(u'\xe5')))
429
self.assertEqual('%', urlutils.unescape(urlutils.escape('%')))
431
def test_relative_url(self):
432
def test(expected, base, other):
433
result = urlutils.relative_url(base, other)
434
self.assertEqual(expected, result)
436
test('a', 'http://host/', 'http://host/a')
437
test('http://entirely/different', 'sftp://host/branch',
438
'http://entirely/different')
439
test('../person/feature', 'http://host/branch/mainline',
440
'http://host/branch/person/feature')
441
test('..', 'http://host/branch', 'http://host/')
442
test('http://host2/branch', 'http://host1/branch', 'http://host2/branch')
443
test('.', 'http://host1/branch', 'http://host1/branch')
444
test('../../../branch/2b', 'file:///home/jelmer/foo/bar/2b',
445
'file:///home/jelmer/branch/2b')
446
test('../../branch/2b', 'sftp://host/home/jelmer/bar/2b',
447
'sftp://host/home/jelmer/branch/2b')
448
test('../../branch/feature/%2b', 'http://host/home/jelmer/bar/%2b',
449
'http://host/home/jelmer/branch/feature/%2b')
450
test('../../branch/feature/2b', 'http://host/home/jelmer/bar/2b/',
451
'http://host/home/jelmer/branch/feature/2b')
452
# relative_url should preserve a trailing slash
453
test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b/',
454
'http://host/home/jelmer/branch/feature/2b/')
455
test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b',
456
'http://host/home/jelmer/branch/feature/2b/')
458
# TODO: treat http://host as http://host/
459
# relative_url is typically called from a branch.base or
460
# transport.base which always ends with a /
461
#test('a', 'http://host', 'http://host/a')
462
test('http://host/a', 'http://host', 'http://host/a')
463
#test('.', 'http://host', 'http://host/')
464
test('http://host/', 'http://host', 'http://host/')
465
#test('.', 'http://host/', 'http://host')
466
test('http://host', 'http://host/', 'http://host')