67
68
relpath = relpath.encode('utf-8')
68
69
# After quoting and encoding, the path should be perfectly
69
70
# safe as a plain ASCII string, str() just enforces this
70
return str(urllib.quote(relpath))
71
return str(urllib.quote(relpath, safe='/~'))
73
74
def file_relpath(base, path):
74
75
"""Compute just the relative sub-portion of a url
76
77
This assumes that both paths are already fully specified file:// URLs.
78
assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or'
79
' exceed the platform minimum url length (which is %d)' %
80
MIN_ABS_FILEURL_LENGTH)
79
if len(base) < MIN_ABS_FILEURL_LENGTH:
80
raise ValueError('Length of base must be equal or'
81
' exceed the platform minimum url length (which is %d)' %
82
MIN_ABS_FILEURL_LENGTH)
82
83
base = local_path_from_url(base)
83
84
path = local_path_from_url(path)
84
85
return escape(osutils.relpath(base, path))
117
118
join('http://foo', 'bar') => 'http://foo/bar'
118
119
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
120
m = _url_scheme_re.match(base)
123
match = _url_scheme_re.match(base)
123
scheme = m.group('scheme')
124
path = m.group('path').split('/')
126
scheme = match.group('scheme')
127
path = match.group('path').split('/')
125
128
if path[-1:] == ['']:
126
129
# Strip off a trailing slash
127
130
# This helps both when we are at the root, and when
131
134
path = base.split('/')
136
if scheme is not None and len(path) >= 1:
138
# the path should be represented as an abs path.
139
# we know this must be absolute because of the presence of a URL scheme.
141
path = [''] + path[1:]
143
# create an empty host, but dont alter the path - this might be a
144
# relative url fragment.
134
m = _url_scheme_re.match(arg)
149
match = _url_scheme_re.match(arg)
137
scheme = m.group('scheme')
138
path = m.group('path').split('/')
152
scheme = match.group('scheme')
153
# this skips .. normalisation, making http://host/../../..
155
path = match.group('path').split('/')
156
# set the host and path according to new absolute URL, discarding
157
# any previous values.
158
# XXX: duplicates mess from earlier in this function. This URL
159
# manipulation code needs some cleaning up.
160
if scheme is not None and len(path) >= 1:
163
# url scheme implies absolute path.
166
# no url scheme we take the path as is.
140
for chunk in arg.split('/'):
145
# Don't pop off the host portion
148
raise errors.InvalidURLJoin('Cannot go above root',
169
path = '/'.join(path)
170
path = joinpath(path, arg)
171
path = path.split('/')
172
if remove_root and path[0:1] == ['']:
175
# Remove the leading slash from the path, so long as it isn't also the
176
# trailing slash, which we want to keep if present.
177
if path and path[0] == '' and len(path) > 1:
153
181
if scheme is None:
154
182
return '/'.join(path)
155
183
return scheme + '://' + '/'.join(path)
186
def joinpath(base, *args):
187
"""Join URL path segments to a URL path segment.
189
This is somewhat like osutils.joinpath, but intended for URLs.
191
XXX: this duplicates some normalisation logic, and also duplicates a lot of
192
path handling logic that already exists in some Transport implementations.
193
We really should try to have exactly one place in the code base responsible
194
for combining paths of URLs.
196
path = base.split('/')
197
if len(path) > 1 and path[-1] == '':
198
#If the path ends in a trailing /, remove it.
201
if arg.startswith('/'):
203
for chunk in arg.split('/'):
208
raise errors.InvalidURLJoin('Cannot go above root',
216
return '/'.join(path)
158
219
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
159
220
def _posix_local_path_from_url(url):
160
221
"""Convert a url like file:///path/to/foo into /path/to/foo"""
178
239
def _win32_local_path_from_url(url):
179
240
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
180
if not url.startswith('file:///'):
181
raise errors.InvalidURL(url, 'local urls must start with file:///')
241
if not url.startswith('file://'):
242
raise errors.InvalidURL(url, 'local urls must start with file:///, '
243
'UNC path urls must start with file://')
182
244
# We strip off all 3 slashes
183
win32_url = url[len('file:///'):]
184
if (win32_url[0] not in ('abcdefghijklmnopqrstuvwxyz'
245
win32_url = url[len('file:'):]
246
# check for UNC path: //HOST/path
247
if not win32_url.startswith('///'):
248
if (win32_url[2] == '/'
249
or win32_url[3] in '|:'):
250
raise errors.InvalidURL(url, 'Win32 UNC path urls'
251
' have form file://HOST/path')
252
return unescape(win32_url)
254
# allow empty paths so we can serve all roots
255
if win32_url == '///':
258
# usual local path with drive letter
259
if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
185
260
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
186
or win32_url[1] not in '|:'
187
or win32_url[2] != '/'):
261
or win32_url[4] not in '|:'
262
or win32_url[5] != '/'):
188
263
raise errors.InvalidURL(url, 'Win32 file urls start with'
189
264
' file:///x:/, where x is a valid drive letter')
190
return win32_url[0].upper() + u':' + unescape(win32_url[2:])
265
return win32_url[3].upper() + u':' + unescape(win32_url[5:])
193
268
def _win32_local_path_to_url(path):
196
271
This also handles transforming escaping unicode characters, etc.
198
# importing directly from ntpath allows us to test this
273
# importing directly from ntpath allows us to test this
199
274
# on non-win32 platform
200
275
# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
201
276
# which actually strips trailing space characters.
202
277
# The worst part is that under linux ntpath.abspath has different
203
278
# semantics, since 'nt' is not an available module.
204
win32_path = osutils._nt_normpath(
205
osutils._win32_abspath(path)).replace('\\', '/')
206
return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])
282
win32_path = osutils._win32_abspath(path)
283
# check for UNC path \\HOST\path
284
if win32_path.startswith('//'):
285
return 'file:' + escape(win32_path)
286
return ('file:///' + str(win32_path[0].upper()) + ':' +
287
escape(win32_path[2:]))
209
290
local_path_to_url = _posix_local_path_to_url
221
302
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
303
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
306
def _unescape_safe_chars(matchobj):
307
"""re.sub callback to convert hex-escapes to plain characters (if safe).
309
e.g. '%7E' will be converted to '~'.
311
hex_digits = matchobj.group(0)[1:]
312
char = chr(int(hex_digits, 16))
313
if char in _url_dont_escape_characters:
316
return matchobj.group(0).upper()
224
319
def normalize_url(url):
225
320
"""Make sure that a path string is in fully normalized URL form.
227
This handles URLs which have unicode characters, spaces,
322
This handles URLs which have unicode characters, spaces,
228
323
special characters, etc.
230
325
It has two basic modes of operation, depending on whether the
243
338
m = _url_scheme_re.match(url)
245
340
return local_path_to_url(url)
341
scheme = m.group('scheme')
342
path = m.group('path')
246
343
if not isinstance(url, unicode):
248
345
if c not in _url_safe_characters:
249
346
raise errors.InvalidURL(url, 'URLs can only contain specific'
250
347
' safe characters (not %r)' % c)
348
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
349
return str(scheme + '://' + ''.join(path))
252
351
# We have a unicode (hybrid) url
253
scheme = m.group('scheme')
254
path = list(m.group('path'))
352
path_chars = list(path)
256
for i in xrange(len(path)):
257
if path[i] not in _url_safe_characters:
258
chars = path[i].encode('utf-8')
259
path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])
260
return scheme + '://' + ''.join(path)
354
for i in xrange(len(path_chars)):
355
if path_chars[i] not in _url_safe_characters:
356
chars = path_chars[i].encode('utf-8')
357
path_chars[i] = ''.join(
358
['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
359
path = ''.join(path_chars)
360
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
361
return str(scheme + '://' + path)
263
364
def relative_url(base, other):
279
380
other_scheme = other[:other_first_slash]
280
381
if base_scheme != other_scheme:
383
elif sys.platform == 'win32' and base_scheme == 'file://':
384
base_drive = base[base_first_slash+1:base_first_slash+3]
385
other_drive = other[other_first_slash+1:other_first_slash+3]
386
if base_drive != other_drive:
283
389
base_path = base[base_first_slash+1:]
284
390
other_path = other[other_first_slash+1:]
312
418
# Strip off the drive letter
313
419
# path is currently /C:/foo
314
420
if len(path) < 3 or path[2] not in ':|' or path[3] != '/':
315
raise errors.InvalidURL(url_base + path,
421
raise errors.InvalidURL(url_base + path,
316
422
'win32 file:/// paths need a drive letter')
317
423
url_base += path[0:3] # file:// + /C:
318
424
path = path[3:] # /foo
326
432
:param exclude_trailing_slash: Strip off a final '/' if it is part
327
433
of the path (but not if it is part of the protocol specification)
329
:return: (parent_url, child_dir). child_dir may be the empty string if we're at
435
:return: (parent_url, child_dir). child_dir may be the empty string if we're at
332
438
scheme_loc, first_path_slash = _find_scheme_and_separator(url)
436
542
# These are characters that if escaped, should stay that way
437
543
_no_decode_chars = ';/?:@&=+$,#'
438
544
_no_decode_ords = [ord(c) for c in _no_decode_chars]
439
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
545
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
440
546
+ ['%02X' % o for o in _no_decode_ords])
441
547
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
442
548
+ [('%02X' % o, chr(o)) for o in range(256)]))
443
549
#These entries get mapped to themselves
444
550
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
552
# These characters shouldn't be percent-encoded, and it's always safe to
553
# unencode them if they are.
554
_url_dont_escape_characters = set(
555
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
556
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
557
"0123456789" # Numbers
558
"-._~" # Unreserved characters
446
561
# These characters should not be escaped
447
_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'
448
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
562
_url_safe_characters = set(
563
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
564
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
565
"0123456789" # Numbers
566
"_.-!~*'()" # Unreserved characters
567
"/;?:@&=+$," # Reserved characters
568
"%#" # Extra reserved characters
453
571
def unescape_for_display(url, encoding):
454
572
"""Decode what you can for a URL, so that we get a nice looking path.
456
574
This will turn file:// urls into local paths, and try to decode
457
575
any portions of a http:// style url that it can.
459
Any sections of the URL which can't be represented in the encoding or
577
Any sections of the URL which can't be represented in the encoding or
460
578
need to stay as escapes are left alone.
462
580
:param url: A 7-bit ASCII URL
463
581
:param encoding: The final output encoding
465
:return: A unicode string which can be safely encoded into the
583
:return: A unicode string which can be safely encoded into the
466
584
specified encoding.
468
assert encoding is not None, 'you cannot specify None for the display encoding.'
587
raise ValueError('you cannot specify None for the display encoding')
469
588
if url.startswith('file://'):
471
590
path = local_path_from_url(url)
505
624
# Otherwise take the url decoded one
507
626
return u'/'.join(res)
629
def derive_to_location(from_location):
630
"""Derive a TO_LOCATION given a FROM_LOCATION.
632
The normal case is a FROM_LOCATION of http://foo/bar => bar.
633
The Right Thing for some logical destinations may differ though
634
because no / may be present at all. In that case, the result is
635
the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
636
This latter case also applies when a Windows drive
637
is used without a path, e.g. c:foo-bar => foo-bar.
638
If no /, path separator or : is found, the from_location is returned.
640
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
641
return os.path.basename(from_location.rstrip("/\\"))
643
sep = from_location.find(":")
645
return from_location[sep+1:]
650
def _is_absolute(url):
651
return (osutils.pathjoin('/foo', url) == url)
654
def rebase_url(url, old_base, new_base):
655
"""Convert a relative path from an old base URL to a new base URL.
657
The result will be a relative path.
658
Absolute paths and full URLs are returned unaltered.
660
scheme, separator = _find_scheme_and_separator(url)
661
if scheme is not None:
663
if _is_absolute(url):
665
old_parsed = urlparse.urlparse(old_base)
666
new_parsed = urlparse.urlparse(new_base)
667
if (old_parsed[:2]) != (new_parsed[:2]):
668
raise errors.InvalidRebaseURLs(old_base, new_base)
669
return determine_relative_path(new_parsed[2],
670
join(old_parsed[2], url))
673
def determine_relative_path(from_path, to_path):
674
"""Determine a relative path from from_path to to_path."""
675
from_segments = osutils.splitpath(from_path)
676
to_segments = osutils.splitpath(to_path)
678
for count, (from_element, to_element) in enumerate(zip(from_segments,
680
if from_element != to_element:
684
unique_from = from_segments[count:]
685
unique_to = to_segments[count:]
686
segments = (['..'] * len(unique_from) + unique_to)
687
if len(segments) == 0:
689
return osutils.pathjoin(*segments)