117
117
join('http://foo', 'bar') => 'http://foo/bar'
118
118
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
120
m = _url_scheme_re.match(base)
122
match = _url_scheme_re.match(base)
123
scheme = m.group('scheme')
124
path = m.group('path').split('/')
125
scheme = match.group('scheme')
126
path = match.group('path').split('/')
125
127
if path[-1:] == ['']:
126
128
# Strip off a trailing slash
127
129
# This helps both when we are at the root, and when
131
133
path = base.split('/')
135
if scheme is not None and len(path) >= 1:
137
# the path should be represented as an abs path.
138
# we know this must be absolute because of the presence of a URL scheme.
140
path = [''] + path[1:]
142
# create an empty host, but dont alter the path - this might be a
143
# relative url fragment.
134
m = _url_scheme_re.match(arg)
148
match = _url_scheme_re.match(arg)
137
scheme = m.group('scheme')
151
scheme = match.group('scheme')
138
152
# this skips .. normalisation, making http://host/../../..
139
153
# be rather strange.
140
path = m.group('path').split('/')
154
path = match.group('path').split('/')
155
# set the host and path according to new absolute URL, discarding
156
# any previous values.
157
# XXX: duplicates mess from earlier in this function. This URL
158
# manipulation code needs some cleaning up.
159
if scheme is not None and len(path) >= 1:
162
# url scheme implies absolute path.
165
# no url scheme we take the path as is.
142
for chunk in arg.split('/'):
147
# Don't pop off the host portion
150
raise errors.InvalidURLJoin('Cannot go above root',
168
path = '/'.join(path)
169
path = joinpath(path, arg)
170
path = path.split('/')
171
if remove_root and path[0:1] == ['']:
174
# Remove the leading slash from the path, so long as it isn't also the
175
# trailing slash, which we want to keep if present.
176
if path and path[0] == '' and len(path) > 1:
155
180
if scheme is None:
156
181
return '/'.join(path)
157
182
return scheme + '://' + '/'.join(path)
185
def joinpath(base, *args):
186
"""Join URL path segments to a URL path segment.
188
This is somewhat like osutils.joinpath, but intended for URLs.
190
XXX: this duplicates some normalisation logic, and also duplicates a lot of
191
path handling logic that already exists in some Transport implementations.
192
We really should try to have exactly one place in the code base responsible
193
for combining paths of URLs.
195
path = base.split('/')
196
if len(path) > 1 and path[-1] == '':
197
#If the path ends in a trailing /, remove it.
200
if arg.startswith('/'):
202
for chunk in arg.split('/'):
207
raise errors.InvalidURLJoin('Cannot go above root',
215
return '/'.join(path)
160
218
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
161
219
def _posix_local_path_from_url(url):
162
220
"""Convert a url like file:///path/to/foo into /path/to/foo"""
212
270
# which actually strips trailing space characters.
213
271
# The worst part is that under linux ntpath.abspath has different
214
272
# semantics, since 'nt' is not an available module.
215
win32_path = osutils._nt_normpath(
216
osutils._win32_abspath(path)).replace('\\', '/')
273
win32_path = osutils._win32_abspath(path)
217
274
# check for UNC path \\HOST\path
218
275
if win32_path.startswith('//'):
219
276
return 'file:' + escape(win32_path)