117
111
join('http://foo', 'bar') => 'http://foo/bar'
118
112
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
122
match = _url_scheme_re.match(base)
114
m = _url_scheme_re.match(base)
125
scheme = match.group('scheme')
126
path = match.group('path').split('/')
127
if path[-1:] == ['']:
128
# Strip off a trailing slash
129
# This helps both when we are at the root, and when
130
# 'base' has an extra slash at the end
117
scheme = m.group('scheme')
118
path = m.group('path').split('/')
133
120
path = base.split('/')
135
if scheme is not None and len(path) >= 1:
137
# the path should be represented as an abs path.
138
# we know this must be absolute because of the presence of a URL scheme.
140
path = [''] + path[1:]
142
# create an empty host, but dont alter the path - this might be a
143
# relative url fragment.
148
match = _url_scheme_re.match(arg)
123
m = _url_scheme_re.match(arg)
151
scheme = match.group('scheme')
152
# this skips .. normalisation, making http://host/../../..
154
path = match.group('path').split('/')
155
# set the host and path according to new absolute URL, discarding
156
# any previous values.
157
# XXX: duplicates mess from earlier in this function. This URL
158
# manipulation code needs some cleaning up.
159
if scheme is not None and len(path) >= 1:
162
# url scheme implies absolute path.
165
# no url scheme we take the path as is.
126
scheme = m.group('scheme')
127
path = m.group('path').split('/')
168
path = '/'.join(path)
169
path = joinpath(path, arg)
170
path = path.split('/')
171
if remove_root and path[0:1] == ['']:
174
# Remove the leading slash from the path, so long as it isn't also the
175
# trailing slash, which we want to keep if present.
176
if path and path[0] == '' and len(path) > 1:
129
for chunk in arg.split('/'):
134
# Don't pop off the host portion
137
raise errors.InvalidURLJoin('Cannot go above root',
180
141
if scheme is None:
181
142
return '/'.join(path)
182
143
return scheme + '://' + '/'.join(path)
185
def joinpath(base, *args):
186
"""Join URL path segments to a URL path segment.
188
This is somewhat like osutils.joinpath, but intended for URLs.
190
XXX: this duplicates some normalisation logic, and also duplicates a lot of
191
path handling logic that already exists in some Transport implementations.
192
We really should try to have exactly one place in the code base responsible
193
for combining paths of URLs.
195
path = base.split('/')
196
if len(path) > 1 and path[-1] == '':
197
#If the path ends in a trailing /, remove it.
200
if arg.startswith('/'):
202
for chunk in arg.split('/'):
207
raise errors.InvalidURLJoin('Cannot go above root',
215
return '/'.join(path)
218
146
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
219
147
def _posix_local_path_from_url(url):
220
148
"""Convert a url like file:///path/to/foo into /path/to/foo"""
232
160
# importing directly from posixpath allows us to test this
233
161
# on non-posix platforms
234
return 'file://' + escape(_posix_normpath(
235
osutils._posix_abspath(path)))
162
from posixpath import normpath
163
return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path)))
238
166
def _win32_local_path_from_url(url):
239
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
240
if not url.startswith('file://'):
241
raise errors.InvalidURL(url, 'local urls must start with file:///, '
242
'UNC path urls must start with file://')
167
"""Convert a url like file:///C|/path/to/foo into C:/path/to/foo"""
168
if not url.startswith('file:///'):
169
raise errors.InvalidURL(url, 'local urls must start with file:///')
243
170
# We strip off all 3 slashes
244
win32_url = url[len('file:'):]
245
# check for UNC path: //HOST/path
246
if not win32_url.startswith('///'):
247
if (win32_url[2] == '/'
248
or win32_url[3] in '|:'):
249
raise errors.InvalidURL(url, 'Win32 UNC path urls'
250
' have form file://HOST/path')
251
return unescape(win32_url)
252
# usual local path with drive letter
253
if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
254
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
255
or win32_url[4] not in '|:'
256
or win32_url[5] != '/'):
257
raise errors.InvalidURL(url, 'Win32 file urls start with'
258
' file:///x:/, where x is a valid drive letter')
259
return win32_url[3].upper() + u':' + unescape(win32_url[5:])
171
win32_url = url[len('file:///'):]
172
if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
173
or win32_url[1] not in '|:'
174
or win32_url[2] != '/'):
175
raise errors.InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter')
176
# TODO: jam 20060426, we could .upper() or .lower() the drive letter
177
# for better consistency.
178
return win32_url[0].upper() + u':' + unescape(win32_url[2:])
262
181
def _win32_local_path_to_url(path):
263
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
182
"""Convert a local path like ./foo into a URL like file:///C|/path/to/foo
265
184
This also handles transforming escaping unicode characters, etc.
267
186
# importing directly from ntpath allows us to test this
268
# on non-win32 platform
269
# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
270
# which actually strips trailing space characters.
271
# The worst part is that under linux ntpath.abspath has different
272
# semantics, since 'nt' is not an available module.
273
win32_path = osutils._win32_abspath(path)
274
# check for UNC path \\HOST\path
275
if win32_path.startswith('//'):
276
return 'file:' + escape(win32_path)
187
# on non-win32 platforms
188
# TODO: jam 20060426 consider moving this import outside of the function
189
win32_path = bzrlib.osutils._nt_normpath(
190
bzrlib.osutils._win32_abspath(path)).replace('\\', '/')
277
191
return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])
280
194
local_path_to_url = _posix_local_path_to_url
281
195
local_path_from_url = _posix_local_path_from_url
282
196
MIN_ABS_FILEURL_LENGTH = len('file:///')
283
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
285
198
if sys.platform == 'win32':
286
199
local_path_to_url = _win32_local_path_to_url
287
200
local_path_from_url = _win32_local_path_from_url
289
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
202
MIN_ABS_FILEURL_LENGTH = len('file:///C|/')
292
205
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
293
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
296
def _unescape_safe_chars(matchobj):
297
"""re.sub callback to convert hex-escapes to plain characters (if safe).
299
e.g. '%7E' will be converted to '~'.
301
hex_digits = matchobj.group(0)[1:]
302
char = chr(int(hex_digits, 16))
303
if char in _url_dont_escape_characters:
306
return matchobj.group(0).upper()
309
208
def normalize_url(url):
310
209
"""Make sure that a path string is in fully normalized URL form.
312
This handles URLs which have unicode characters, spaces,
211
This handles URLs which have unicode characters, spaces,
313
212
special characters, etc.
315
214
It has two basic modes of operation, depending on whether the
328
227
m = _url_scheme_re.match(url)
330
229
return local_path_to_url(url)
331
scheme = m.group('scheme')
332
path = m.group('path')
333
230
if not isinstance(url, unicode):
231
# TODO: jam 20060510 We need to test for ascii characters that
232
# shouldn't be allowed in URLs
335
234
if c not in _url_safe_characters:
336
235
raise errors.InvalidURL(url, 'URLs can only contain specific'
337
236
' safe characters (not %r)' % c)
338
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
339
return str(scheme + '://' + ''.join(path))
341
238
# We have a unicode (hybrid) url
342
path_chars = list(path)
239
scheme = m.group('scheme')
240
path = list(m.group('path'))
344
for i in xrange(len(path_chars)):
345
if path_chars[i] not in _url_safe_characters:
346
chars = path_chars[i].encode('utf-8')
347
path_chars[i] = ''.join(
348
['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
349
path = ''.join(path_chars)
350
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
351
return str(scheme + '://' + path)
242
for i in xrange(len(path)):
243
if path[i] not in _url_safe_characters:
244
chars = path[i].encode('utf-8')
245
path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])
246
return scheme + '://' + ''.join(path)
354
249
def relative_url(base, other):
477
357
file:///foo/ => file:///foo
478
358
# This is unique on win32 platforms, and is the only URL
479
359
# format which does it differently.
480
file:///c|/ => file:///c:/
360
file:///C|/ => file:///C|/
482
362
if not url.endswith('/'):
485
if sys.platform == 'win32' and url.startswith('file://'):
486
return _win32_strip_local_trailing_slash(url)
365
if sys.platform == 'win32' and url.startswith('file:///'):
366
# This gets handled specially, because the 'top-level'
367
# of a win32 path is actually the drive letter
368
if len(url) > MIN_ABS_FILEURL_LENGTH:
488
372
scheme_loc, first_path_slash = _find_scheme_and_separator(url)
489
373
if scheme_loc is None:
490
374
# This is a relative path, as it has no scheme
534
417
#These entries get mapped to themselves
535
418
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
537
# These characters shouldn't be percent-encoded, and it's always safe to
538
# unencode them if they are.
539
_url_dont_escape_characters = set(
540
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
541
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
542
"0123456789" # Numbers
543
"-._~" # Unreserved characters
546
420
# These characters should not be escaped
547
_url_safe_characters = set(
548
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
549
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
550
"0123456789" # Numbers
551
"_.-!~*'()" # Unreserved characters
552
"/;?:@&=+$," # Reserved characters
553
"%#" # Extra reserved characters
421
_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'
422
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
556
427
def unescape_for_display(url, encoding):
557
428
"""Decode what you can for a URL, so that we get a nice looking path.
608
478
# Otherwise take the url decoded one
610
480
return u'/'.join(res)
613
def derive_to_location(from_location):
614
"""Derive a TO_LOCATION given a FROM_LOCATION.
616
The normal case is a FROM_LOCATION of http://foo/bar => bar.
617
The Right Thing for some logical destinations may differ though
618
because no / may be present at all. In that case, the result is
619
the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
620
This latter case also applies when a Windows drive
621
is used without a path, e.g. c:foo-bar => foo-bar.
622
If no /, path separator or : is found, the from_location is returned.
624
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
625
return os.path.basename(from_location.rstrip("/\\"))
627
sep = from_location.find(":")
629
return from_location[sep+1:]