3873.3.1
by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test |
1 |
# Copyright (C) 2006, 2008 Canonical Ltd
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
16 |
|
17 |
"""A collection of function for handling URL operations."""
|
|
18 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
19 |
import os |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
20 |
import re |
21 |
import sys |
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
22 |
|
23 |
from bzrlib.lazy_import import lazy_import |
|
24 |
lazy_import(globals(), """ |
|
25 |
from posixpath import split as _posix_split, normpath as _posix_normpath
|
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
26 |
import urllib
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
27 |
import urlparse
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
28 |
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
29 |
from bzrlib import (
|
30 |
errors,
|
|
31 |
osutils,
|
|
32 |
)
|
|
33 |
""") |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
34 |
|
35 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
36 |
def basename(url, exclude_trailing_slash=True): |
37 |
"""Return the last component of a URL.
|
|
38 |
||
39 |
:param url: The URL in question
|
|
40 |
:param exclude_trailing_slash: If the url looks like "path/to/foo/"
|
|
41 |
ignore the final slash and return 'foo' rather than ''
|
|
42 |
:return: Just the final component of the URL. This can return ''
|
|
43 |
if you don't exclude_trailing_slash, or if you are at the
|
|
44 |
root of the URL.
|
|
45 |
"""
|
|
46 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[1] |
|
47 |
||
48 |
||
49 |
def dirname(url, exclude_trailing_slash=True): |
|
50 |
"""Return the parent directory of the given path.
|
|
51 |
||
52 |
:param url: Relative or absolute URL
|
|
53 |
:param exclude_trailing_slash: Remove a final slash
|
|
54 |
(treat http://host/foo/ as http://host/foo, but
|
|
55 |
http://host/ stays http://host/)
|
|
56 |
:return: Everything in the URL except the last path chunk
|
|
57 |
"""
|
|
58 |
# TODO: jam 20060502 This was named dirname to be consistent
|
|
59 |
# with the os functions, but maybe "parent" would be better
|
|
60 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0] |
|
61 |
||
62 |
||
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
63 |
def escape(relpath): |
64 |
"""Escape relpath to be a valid url."""
|
|
65 |
if isinstance(relpath, unicode): |
|
66 |
relpath = relpath.encode('utf-8') |
|
67 |
# After quoting and encoding, the path should be perfectly
|
|
68 |
# safe as a plain ASCII string, str() just enforces this
|
|
4098.3.1
by Jonathan Lange
Don't escape tildes |
69 |
return str(urllib.quote(relpath, safe='/~')) |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
70 |
|
71 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
72 |
def file_relpath(base, path): |
73 |
"""Compute just the relative sub-portion of a url
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
74 |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
75 |
This assumes that both paths are already fully specified file:// URLs.
|
76 |
"""
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
77 |
if len(base) < MIN_ABS_FILEURL_LENGTH: |
78 |
raise ValueError('Length of base must be equal or' |
|
79 |
' exceed the platform minimum url length (which is %d)' % |
|
80 |
MIN_ABS_FILEURL_LENGTH) |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
81 |
base = local_path_from_url(base) |
82 |
path = local_path_from_url(path) |
|
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
83 |
return escape(osutils.relpath(base, path)) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
84 |
|
85 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
86 |
def _find_scheme_and_separator(url): |
87 |
"""Find the scheme separator (://) and the first path separator
|
|
88 |
||
89 |
This is just a helper functions for other path utilities.
|
|
90 |
It could probably be replaced by urlparse
|
|
91 |
"""
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
92 |
m = _url_scheme_re.match(url) |
93 |
if not m: |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
94 |
return None, None |
95 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
96 |
scheme = m.group('scheme') |
97 |
path = m.group('path') |
|
98 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
99 |
# Find the path separating slash
|
100 |
# (first slash after the ://)
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
101 |
first_path_slash = path.find('/') |
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
102 |
if first_path_slash == -1: |
1685.1.56
by John Arbash Meinel
Fixing _find_scheme_and_separator |
103 |
return len(scheme), None |
104 |
return len(scheme), first_path_slash+len(scheme)+3 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
105 |
|
106 |
||
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
107 |
def join(base, *args): |
108 |
"""Create a URL by joining sections.
|
|
109 |
||
110 |
This will normalize '..', assuming that paths are absolute
|
|
111 |
(it assumes no symlinks in either path)
|
|
112 |
||
113 |
If any of *args is an absolute URL, it will be treated correctly.
|
|
114 |
Example:
|
|
115 |
join('http://foo', 'http://bar') => 'http://bar'
|
|
116 |
join('http://foo', 'bar') => 'http://foo/bar'
|
|
117 |
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
|
|
118 |
"""
|
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
119 |
if not args: |
120 |
return base |
|
121 |
match = _url_scheme_re.match(base) |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
122 |
scheme = None |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
123 |
if match: |
124 |
scheme = match.group('scheme') |
|
125 |
path = match.group('path').split('/') |
|
1711.2.49
by John Arbash Meinel
urlutils.join should work for root paths. |
126 |
if path[-1:] == ['']: |
127 |
# Strip off a trailing slash
|
|
128 |
# This helps both when we are at the root, and when
|
|
129 |
# 'base' has an extra slash at the end
|
|
130 |
path = path[:-1] |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
131 |
else: |
132 |
path = base.split('/') |
|
133 |
||
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
134 |
if scheme is not None and len(path) >= 1: |
2018.5.93
by Andrew Bennetts
Fix another bug in urlutils.join. |
135 |
host = path[:1] |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
136 |
# the path should be represented as an abs path.
|
137 |
# we know this must be absolute because of the presence of a URL scheme.
|
|
138 |
remove_root = True |
|
139 |
path = [''] + path[1:] |
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
140 |
else: |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
141 |
# create an empty host, but dont alter the path - this might be a
|
142 |
# relative url fragment.
|
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
143 |
host = [] |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
144 |
remove_root = False |
145 |
||
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
146 |
for arg in args: |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
147 |
match = _url_scheme_re.match(arg) |
148 |
if match: |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
149 |
# Absolute URL
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
150 |
scheme = match.group('scheme') |
1986.1.10
by Robert Collins
Merge from bzr.dev, fixing found bugs handling 'has('/')' in MemoryTransport and SFTP transports. |
151 |
# this skips .. normalisation, making http://host/../../..
|
152 |
# be rather strange.
|
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
153 |
path = match.group('path').split('/') |
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
154 |
# set the host and path according to new absolute URL, discarding
|
155 |
# any previous values.
|
|
156 |
# XXX: duplicates mess from earlier in this function. This URL
|
|
157 |
# manipulation code needs some cleaning up.
|
|
158 |
if scheme is not None and len(path) >= 1: |
|
2018.5.92
by Andrew Bennetts
Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash. |
159 |
host = path[:1] |
160 |
path = path[1:] |
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
161 |
# url scheme implies absolute path.
|
162 |
path = [''] + path |
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
163 |
else: |
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
164 |
# no url scheme we take the path as is.
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
165 |
host = [] |
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
166 |
else: |
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
167 |
path = '/'.join(path) |
168 |
path = joinpath(path, arg) |
|
169 |
path = path.split('/') |
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
170 |
if remove_root and path[0:1] == ['']: |
171 |
del path[0] |
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
172 |
if host: |
2018.5.92
by Andrew Bennetts
Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash. |
173 |
# Remove the leading slash from the path, so long as it isn't also the
|
174 |
# trailing slash, which we want to keep if present.
|
|
175 |
if path and path[0] == '' and len(path) > 1: |
|
2018.5.54
by Andrew Bennetts
Fix ChrootTransportDecorator's abspath method to be consistent with its clone |
176 |
del path[0] |
177 |
path = host + path |
|
1685.1.80
by Wouter van Heyst
more code cleanup |
178 |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
179 |
if scheme is None: |
180 |
return '/'.join(path) |
|
181 |
return scheme + '://' + '/'.join(path) |
|
182 |
||
183 |
||
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
184 |
def joinpath(base, *args): |
185 |
"""Join URL path segments to a URL path segment.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
186 |
|
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
187 |
This is somewhat like osutils.joinpath, but intended for URLs.
|
188 |
||
189 |
XXX: this duplicates some normalisation logic, and also duplicates a lot of
|
|
190 |
path handling logic that already exists in some Transport implementations.
|
|
191 |
We really should try to have exactly one place in the code base responsible
|
|
192 |
for combining paths of URLs.
|
|
193 |
"""
|
|
2018.5.100
by Andrew Bennetts
Fix IndexError in urlutils.join with 'http://host/a' and '../../b'. |
194 |
path = base.split('/') |
195 |
if len(path) > 1 and path[-1] == '': |
|
196 |
#If the path ends in a trailing /, remove it.
|
|
197 |
path.pop() |
|
2018.5.46
by Andrew Bennetts
Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone. |
198 |
for arg in args: |
199 |
if arg.startswith('/'): |
|
200 |
path = [] |
|
201 |
for chunk in arg.split('/'): |
|
202 |
if chunk == '.': |
|
203 |
continue
|
|
204 |
elif chunk == '..': |
|
205 |
if path == ['']: |
|
206 |
raise errors.InvalidURLJoin('Cannot go above root', |
|
207 |
base, args) |
|
208 |
path.pop() |
|
209 |
else: |
|
210 |
path.append(chunk) |
|
211 |
if path == ['']: |
|
212 |
return '/' |
|
213 |
else: |
|
214 |
return '/'.join(path) |
|
215 |
||
216 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
217 |
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
|
218 |
def _posix_local_path_from_url(url): |
|
219 |
"""Convert a url like file:///path/to/foo into /path/to/foo"""
|
|
220 |
if not url.startswith('file:///'): |
|
221 |
raise errors.InvalidURL(url, 'local urls must start with file:///') |
|
222 |
# We only strip off 2 slashes
|
|
223 |
return unescape(url[len('file://'):]) |
|
224 |
||
225 |
||
226 |
def _posix_local_path_to_url(path): |
|
227 |
"""Convert a local path like ./foo into a URL like file:///path/to/foo
|
|
228 |
||
229 |
This also handles transforming escaping unicode characters, etc.
|
|
230 |
"""
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
231 |
# importing directly from posixpath allows us to test this
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
232 |
# on non-posix platforms
|
1711.4.5
by John Arbash Meinel
the _posix_* routines should use posixpath not os.path, so tests pass on win32 |
233 |
return 'file://' + escape(_posix_normpath( |
1996.3.12
by John Arbash Meinel
Change how 'revision' is imported to avoid problems later |
234 |
osutils._posix_abspath(path))) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
235 |
|
236 |
||
237 |
def _win32_local_path_from_url(url): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
238 |
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
239 |
if not url.startswith('file://'): |
240 |
raise errors.InvalidURL(url, 'local urls must start with file:///, ' |
|
241 |
'UNC path urls must start with file://') |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
242 |
# We strip off all 3 slashes
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
243 |
win32_url = url[len('file:'):] |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
244 |
# check for UNC path: //HOST/path
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
245 |
if not win32_url.startswith('///'): |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
246 |
if (win32_url[2] == '/' |
247 |
or win32_url[3] in '|:'): |
|
248 |
raise errors.InvalidURL(url, 'Win32 UNC path urls' |
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
249 |
' have form file://HOST/path') |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
250 |
return unescape(win32_url) |
3503.1.2
by adwi2
Permits Windows to serve all paths on all drives. |
251 |
|
252 |
# allow empty paths so we can serve all roots
|
|
253 |
if win32_url == '///': |
|
254 |
return '/' |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
255 |
|
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
256 |
# usual local path with drive letter
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
257 |
if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz' |
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
258 |
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') |
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
259 |
or win32_url[4] not in '|:' |
260 |
or win32_url[5] != '/'): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
261 |
raise errors.InvalidURL(url, 'Win32 file urls start with' |
1711.4.8
by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters |
262 |
' file:///x:/, where x is a valid drive letter') |
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
263 |
return win32_url[3].upper() + u':' + unescape(win32_url[5:]) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
264 |
|
265 |
||
266 |
def _win32_local_path_to_url(path): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
267 |
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
268 |
|
269 |
This also handles transforming escaping unicode characters, etc.
|
|
270 |
"""
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
271 |
# importing directly from ntpath allows us to test this
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
272 |
# on non-win32 platform
|
273 |
# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
|
|
274 |
# which actually strips trailing space characters.
|
|
275 |
# The worst part is that under linux ntpath.abspath has different
|
|
276 |
# semantics, since 'nt' is not an available module.
|
|
3503.1.1
by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url |
277 |
if path == '/': |
3503.1.2
by adwi2
Permits Windows to serve all paths on all drives. |
278 |
return 'file:///' |
3503.1.1
by Adrian Wilkins
Add a couple of special cases to urlutils._win32_path_(from|to)_url |
279 |
|
2279.4.2
by Alexander Belchenko
Don't do normpath after abspath, because this function is called inside abspath |
280 |
win32_path = osutils._win32_abspath(path) |
2162.2.2
by Alexander Belchenko
Support for win32 UNC path (like: \\HOST\path) |
281 |
# check for UNC path \\HOST\path
|
282 |
if win32_path.startswith('//'): |
|
2162.2.7
by Alexander Belchenko
Win32 UNC path \\HOST\path mapped to URL file://HOST/path |
283 |
return 'file:' + escape(win32_path) |
3234.3.1
by Alexander Belchenko
ensure that local_path_to_url() always returns plain string, not unicode. |
284 |
return ('file:///' + str(win32_path[0].upper()) + ':' + |
285 |
escape(win32_path[2:])) |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
286 |
|
287 |
||
288 |
local_path_to_url = _posix_local_path_to_url |
|
289 |
local_path_from_url = _posix_local_path_from_url |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
290 |
MIN_ABS_FILEURL_LENGTH = len('file:///') |
1711.4.17
by John Arbash Meinel
[merge] bzr.dev 1790 |
291 |
WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/') |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
292 |
|
293 |
if sys.platform == 'win32': |
|
294 |
local_path_to_url = _win32_local_path_to_url |
|
295 |
local_path_from_url = _win32_local_path_from_url |
|
296 |
||
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
297 |
MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
298 |
|
299 |
||
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
300 |
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$') |
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
301 |
_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})') |
302 |
||
303 |
||
304 |
def _unescape_safe_chars(matchobj): |
|
305 |
"""re.sub callback to convert hex-escapes to plain characters (if safe).
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
306 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
307 |
e.g. '%7E' will be converted to '~'.
|
308 |
"""
|
|
309 |
hex_digits = matchobj.group(0)[1:] |
|
310 |
char = chr(int(hex_digits, 16)) |
|
311 |
if char in _url_dont_escape_characters: |
|
312 |
return char |
|
313 |
else: |
|
314 |
return matchobj.group(0).upper() |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
315 |
|
316 |
||
317 |
def normalize_url(url): |
|
318 |
"""Make sure that a path string is in fully normalized URL form.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
319 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
320 |
This handles URLs which have unicode characters, spaces,
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
321 |
special characters, etc.
|
322 |
||
323 |
It has two basic modes of operation, depending on whether the
|
|
324 |
supplied string starts with a url specifier (scheme://) or not.
|
|
325 |
If it does not have a specifier it is considered a local path,
|
|
326 |
and will be converted into a file:/// url. Non-ascii characters
|
|
327 |
will be encoded using utf-8.
|
|
328 |
If it does have a url specifier, it will be treated as a "hybrid"
|
|
329 |
URL. Basically, a URL that should have URL special characters already
|
|
330 |
escaped (like +?&# etc), but may have unicode characters, etc
|
|
331 |
which would not be valid in a real URL.
|
|
332 |
||
333 |
:param url: Either a hybrid URL or a local path
|
|
334 |
:return: A normalized URL which only includes 7-bit ASCII characters.
|
|
335 |
"""
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
336 |
m = _url_scheme_re.match(url) |
337 |
if not m: |
|
338 |
return local_path_to_url(url) |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
339 |
scheme = m.group('scheme') |
340 |
path = m.group('path') |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
341 |
if not isinstance(url, unicode): |
342 |
for c in url: |
|
343 |
if c not in _url_safe_characters: |
|
1685.1.53
by John Arbash Meinel
Updated normalize_url |
344 |
raise errors.InvalidURL(url, 'URLs can only contain specific' |
345 |
' safe characters (not %r)' % c) |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
346 |
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) |
347 |
return str(scheme + '://' + ''.join(path)) |
|
348 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
349 |
# We have a unicode (hybrid) url
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
350 |
path_chars = list(path) |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
351 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
352 |
for i in xrange(len(path_chars)): |
353 |
if path_chars[i] not in _url_safe_characters: |
|
354 |
chars = path_chars[i].encode('utf-8') |
|
355 |
path_chars[i] = ''.join( |
|
356 |
['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')]) |
|
357 |
path = ''.join(path_chars) |
|
358 |
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path) |
|
359 |
return str(scheme + '://' + path) |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
360 |
|
361 |
||
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
362 |
def relative_url(base, other): |
363 |
"""Return a path to other from base.
|
|
364 |
||
365 |
If other is unrelated to base, return other. Else return a relative path.
|
|
366 |
This assumes no symlinks as part of the url.
|
|
367 |
"""
|
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
368 |
dummy, base_first_slash = _find_scheme_and_separator(base) |
369 |
if base_first_slash is None: |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
370 |
return other |
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
371 |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
372 |
dummy, other_first_slash = _find_scheme_and_separator(other) |
373 |
if other_first_slash is None: |
|
374 |
return other |
|
375 |
||
376 |
# this takes care of differing schemes or hosts
|
|
377 |
base_scheme = base[:base_first_slash] |
|
378 |
other_scheme = other[:other_first_slash] |
|
379 |
if base_scheme != other_scheme: |
|
380 |
return other |
|
3139.2.1
by Alexander Belchenko
bugfix #90847: fix problem with parent location on another logical drive |
381 |
elif sys.platform == 'win32' and base_scheme == 'file://': |
382 |
base_drive = base[base_first_slash+1:base_first_slash+3] |
|
383 |
other_drive = other[other_first_slash+1:other_first_slash+3] |
|
384 |
if base_drive != other_drive: |
|
385 |
return other |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
386 |
|
387 |
base_path = base[base_first_slash+1:] |
|
388 |
other_path = other[other_first_slash+1:] |
|
389 |
||
390 |
if base_path.endswith('/'): |
|
391 |
base_path = base_path[:-1] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
392 |
|
393 |
base_sections = base_path.split('/') |
|
394 |
other_sections = other_path.split('/') |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
395 |
|
396 |
if base_sections == ['']: |
|
397 |
base_sections = [] |
|
398 |
if other_sections == ['']: |
|
399 |
other_sections = [] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
400 |
|
401 |
output_sections = [] |
|
402 |
for b, o in zip(base_sections, other_sections): |
|
403 |
if b != o: |
|
404 |
break
|
|
405 |
output_sections.append(b) |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
406 |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
407 |
match_len = len(output_sections) |
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
408 |
output_sections = ['..' for x in base_sections[match_len:]] |
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
409 |
output_sections.extend(other_sections[match_len:]) |
410 |
||
411 |
return "/".join(output_sections) or "." |
|
412 |
||
413 |
||
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
414 |
def _win32_extract_drive_letter(url_base, path): |
415 |
"""On win32 the drive letter needs to be added to the url base."""
|
|
416 |
# Strip off the drive letter
|
|
417 |
# path is currently /C:/foo
|
|
418 |
if len(path) < 3 or path[2] not in ':|' or path[3] != '/': |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
419 |
raise errors.InvalidURL(url_base + path, |
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
420 |
'win32 file:/// paths need a drive letter') |
421 |
url_base += path[0:3] # file:// + /C: |
|
422 |
path = path[3:] # /foo |
|
423 |
return url_base, path |
|
424 |
||
425 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
426 |
def split(url, exclude_trailing_slash=True): |
427 |
"""Split a URL into its parent directory and a child directory.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
428 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
429 |
:param url: A relative or absolute URL
|
430 |
:param exclude_trailing_slash: Strip off a final '/' if it is part
|
|
431 |
of the path (but not if it is part of the protocol specification)
|
|
1685.1.61
by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split |
432 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
433 |
:return: (parent_url, child_dir). child_dir may be the empty string if we're at
|
1685.1.61
by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split |
434 |
the root.
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
435 |
"""
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
436 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
437 |
||
438 |
if first_path_slash is None: |
|
439 |
# We have either a relative path, or no separating slash
|
|
440 |
if scheme_loc is None: |
|
441 |
# Relative path
|
|
442 |
if exclude_trailing_slash and url.endswith('/'): |
|
443 |
url = url[:-1] |
|
444 |
return _posix_split(url) |
|
445 |
else: |
|
446 |
# Scheme with no path
|
|
447 |
return url, '' |
|
448 |
||
449 |
# We have a fully defined path
|
|
450 |
url_base = url[:first_path_slash] # http://host, file:// |
|
451 |
path = url[first_path_slash:] # /file/foo |
|
452 |
||
453 |
if sys.platform == 'win32' and url.startswith('file:///'): |
|
454 |
# Strip off the drive letter
|
|
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
455 |
# url_base is currently file://
|
1711.2.39
by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths. |
456 |
# path is currently /C:/foo
|
1711.2.43
by John Arbash Meinel
Split out win32 specific code so that it can be tested on all platforms. |
457 |
url_base, path = _win32_extract_drive_letter(url_base, path) |
458 |
# now it should be file:///C: and /foo
|
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
459 |
|
460 |
if exclude_trailing_slash and len(path) > 1 and path.endswith('/'): |
|
461 |
path = path[:-1] |
|
462 |
head, tail = _posix_split(path) |
|
463 |
return url_base + head, tail |
|
464 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
465 |
|
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
466 |
def _win32_strip_local_trailing_slash(url): |
467 |
"""Strip slashes after the drive letter"""
|
|
468 |
if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH: |
|
469 |
return url[:-1] |
|
470 |
else: |
|
471 |
return url |
|
472 |
||
473 |
||
1685.1.47
by John Arbash Meinel
s comes before u |
474 |
def strip_trailing_slash(url): |
475 |
"""Strip trailing slash, except for root paths.
|
|
476 |
||
477 |
The definition of 'root path' is platform-dependent.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
478 |
This assumes that all URLs are valid netloc urls, such that they
|
479 |
form:
|
|
480 |
scheme://host/path
|
|
481 |
It searches for ://, and then refuses to remove the next '/'.
|
|
482 |
It can also handle relative paths
|
|
483 |
Examples:
|
|
484 |
path/to/foo => path/to/foo
|
|
485 |
path/to/foo/ => path/to/foo
|
|
486 |
http://host/path/ => http://host/path
|
|
487 |
http://host/path => http://host/path
|
|
488 |
http://host/ => http://host/
|
|
489 |
file:/// => file:///
|
|
490 |
file:///foo/ => file:///foo
|
|
491 |
# This is unique on win32 platforms, and is the only URL
|
|
492 |
# format which does it differently.
|
|
1711.4.8
by John Arbash Meinel
switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters |
493 |
file:///c|/ => file:///c:/
|
1685.1.47
by John Arbash Meinel
s comes before u |
494 |
"""
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
495 |
if not url.endswith('/'): |
496 |
# Nothing to do
|
|
497 |
return url |
|
2245.6.1
by Alexander Belchenko
win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on // |
498 |
if sys.platform == 'win32' and url.startswith('file://'): |
1711.2.44
by John Arbash Meinel
Factor out another win32 special case and add platform independent tests for it. |
499 |
return _win32_strip_local_trailing_slash(url) |
1685.1.80
by Wouter van Heyst
more code cleanup |
500 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
501 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
502 |
if scheme_loc is None: |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
503 |
# This is a relative path, as it has no scheme
|
504 |
# so just chop off the last character
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
505 |
return url[:-1] |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
506 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
507 |
if first_path_slash is None or first_path_slash == len(url)-1: |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
508 |
# Don't chop off anything if the only slash is the path
|
509 |
# separating slash
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
510 |
return url |
511 |
||
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
512 |
return url[:-1] |
513 |
||
1685.1.47
by John Arbash Meinel
s comes before u |
514 |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
515 |
def unescape(url): |
516 |
"""Unescape relpath from url format.
|
|
517 |
||
518 |
This returns a Unicode path from a URL
|
|
519 |
"""
|
|
520 |
# jam 20060427 URLs are supposed to be ASCII only strings
|
|
521 |
# If they are passed in as unicode, urllib.unquote
|
|
522 |
# will return a UNICODE string, which actually contains
|
|
523 |
# utf-8 bytes. So we have to ensure that they are
|
|
524 |
# plain ASCII strings, or the final .decode will
|
|
525 |
# try to encode the UNICODE => ASCII, and then decode
|
|
526 |
# it into utf-8.
|
|
527 |
try: |
|
528 |
url = str(url) |
|
529 |
except UnicodeError, e: |
|
530 |
raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,)) |
|
1685.1.80
by Wouter van Heyst
more code cleanup |
531 |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
532 |
unquoted = urllib.unquote(url) |
533 |
try: |
|
534 |
unicode_path = unquoted.decode('utf-8') |
|
535 |
except UnicodeError, e: |
|
536 |
raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,)) |
|
537 |
return unicode_path |
|
538 |
||
539 |
||
540 |
# These are characters that if escaped, should stay that way
|
|
541 |
_no_decode_chars = ';/?:@&=+$,#' |
|
542 |
_no_decode_ords = [ord(c) for c in _no_decode_chars] |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
543 |
_no_decode_hex = (['%02x' % o for o in _no_decode_ords] |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
544 |
+ ['%02X' % o for o in _no_decode_ords]) |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
545 |
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)] |
546 |
+ [('%02X' % o, chr(o)) for o in range(256)])) |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
547 |
#These entries get mapped to themselves
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
548 |
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex) |
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
549 |
|
2208.4.1
by Andrew Bennetts
normalize_url should normalise escaping of unreserved characters, like '~'. |
550 |
# These characters shouldn't be percent-encoded, and it's always safe to
|
551 |
# unencode them if they are.
|
|
552 |
_url_dont_escape_characters = set( |
|
553 |
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha |
|
554 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha |
|
555 |
"0123456789" # Numbers |
|
556 |
"-._~" # Unreserved characters |
|
557 |
)
|
|
558 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
559 |
# These characters should not be escaped
|
2167.2.2
by Aaron Bentley
Update safe character list |
560 |
_url_safe_characters = set( |
561 |
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha |
|
562 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha |
|
563 |
"0123456789" # Numbers |
|
564 |
"_.-!~*'()" # Unreserved characters |
|
565 |
"/;?:@&=+$," # Reserved characters |
|
566 |
"%#" # Extra reserved characters |
|
567 |
)
|
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
568 |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
569 |
def unescape_for_display(url, encoding): |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
570 |
"""Decode what you can for a URL, so that we get a nice looking path.
|
571 |
||
572 |
This will turn file:// urls into local paths, and try to decode
|
|
573 |
any portions of a http:// style url that it can.
|
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
574 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
575 |
Any sections of the URL which can't be represented in the encoding or
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
576 |
need to stay as escapes are left alone.
|
577 |
||
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
578 |
:param url: A 7-bit ASCII URL
|
579 |
:param encoding: The final output encoding
|
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
580 |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
581 |
:return: A unicode string which can be safely encoded into the
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
582 |
specified encoding.
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
583 |
"""
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
584 |
if encoding is None: |
585 |
raise ValueError('you cannot specify None for the display encoding') |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
586 |
if url.startswith('file://'): |
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
587 |
try: |
588 |
path = local_path_from_url(url) |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
589 |
path.encode(encoding) |
590 |
return path |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
591 |
except UnicodeError: |
592 |
return url |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
593 |
|
594 |
# Split into sections to try to decode utf-8
|
|
595 |
res = url.split('/') |
|
596 |
for i in xrange(1, len(res)): |
|
597 |
escaped_chunks = res[i].split('%') |
|
598 |
for j in xrange(1, len(escaped_chunks)): |
|
599 |
item = escaped_chunks[j] |
|
600 |
try: |
|
601 |
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:] |
|
602 |
except KeyError: |
|
603 |
# Put back the percent symbol
|
|
604 |
escaped_chunks[j] = '%' + item |
|
605 |
except UnicodeDecodeError: |
|
606 |
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:] |
|
607 |
unescaped = ''.join(escaped_chunks) |
|
608 |
try: |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
609 |
decoded = unescaped.decode('utf-8') |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
610 |
except UnicodeDecodeError: |
611 |
# If this path segment cannot be properly utf-8 decoded
|
|
612 |
# after doing unescaping we will just leave it alone
|
|
613 |
pass
|
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
614 |
else: |
615 |
try: |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
616 |
decoded.encode(encoding) |
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
617 |
except UnicodeEncodeError: |
618 |
# If this chunk cannot be encoded in the local
|
|
619 |
# encoding, then we should leave it alone
|
|
620 |
pass
|
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
621 |
else: |
622 |
# Otherwise take the url decoded one
|
|
623 |
res[i] = decoded |
|
624 |
return u'/'.join(res) |
|
2512.4.1
by Ian Clatworthy
Fixes #115491 - 'branch lp:projname' now creates ./projname as exected |
625 |
|
626 |
||
627 |
def derive_to_location(from_location): |
|
628 |
"""Derive a TO_LOCATION given a FROM_LOCATION.
|
|
629 |
||
630 |
The normal case is a FROM_LOCATION of http://foo/bar => bar.
|
|
631 |
The Right Thing for some logical destinations may differ though
|
|
632 |
because no / may be present at all. In that case, the result is
|
|
633 |
the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
|
|
634 |
This latter case also applies when a Windows drive
|
|
635 |
is used without a path, e.g. c:foo-bar => foo-bar.
|
|
636 |
If no /, path separator or : is found, the from_location is returned.
|
|
637 |
"""
|
|
638 |
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0: |
|
639 |
return os.path.basename(from_location.rstrip("/\\")) |
|
640 |
else: |
|
641 |
sep = from_location.find(":") |
|
642 |
if sep > 0: |
|
643 |
return from_location[sep+1:] |
|
644 |
else: |
|
645 |
return from_location |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
646 |
|
3242.3.35
by Aaron Bentley
Cleanups and documentation |
647 |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
648 |
def _is_absolute(url): |
649 |
return (osutils.pathjoin('/foo', url) == url) |
|
650 |
||
3242.3.35
by Aaron Bentley
Cleanups and documentation |
651 |
|
3242.3.26
by Aaron Bentley
Implement rebase_url |
652 |
def rebase_url(url, old_base, new_base): |
653 |
"""Convert a relative path from an old base URL to a new base URL.
|
|
654 |
||
655 |
The result will be a relative path.
|
|
656 |
Absolute paths and full URLs are returned unaltered.
|
|
657 |
"""
|
|
658 |
scheme, separator = _find_scheme_and_separator(url) |
|
659 |
if scheme is not None: |
|
660 |
return url |
|
661 |
if _is_absolute(url): |
|
662 |
return url |
|
663 |
old_parsed = urlparse.urlparse(old_base) |
|
664 |
new_parsed = urlparse.urlparse(new_base) |
|
665 |
if (old_parsed[:2]) != (new_parsed[:2]): |
|
3242.3.33
by Aaron Bentley
Handle relative URL stacking cleanly |
666 |
raise errors.InvalidRebaseURLs(old_base, new_base) |
3242.3.36
by Aaron Bentley
Updates from review comments |
667 |
return determine_relative_path(new_parsed[2], |
3567.2.1
by Michael Hudson
urlutils.rebase_url handles '..' path segments in 'url' |
668 |
join(old_parsed[2], url)) |
3242.3.26
by Aaron Bentley
Implement rebase_url |
669 |
|
670 |
||
671 |
def determine_relative_path(from_path, to_path): |
|
672 |
"""Determine a relative path from from_path to to_path."""
|
|
673 |
from_segments = osutils.splitpath(from_path) |
|
674 |
to_segments = osutils.splitpath(to_path) |
|
675 |
count = -1 |
|
676 |
for count, (from_element, to_element) in enumerate(zip(from_segments, |
|
677 |
to_segments)): |
|
678 |
if from_element != to_element: |
|
679 |
break
|
|
680 |
else: |
|
681 |
count += 1 |
|
682 |
unique_from = from_segments[count:] |
|
683 |
unique_to = to_segments[count:] |
|
684 |
segments = (['..'] * len(unique_from) + unique_to) |
|
685 |
if len(segments) == 0: |
|
686 |
return '.' |
|
687 |
return osutils.pathjoin(*segments) |
|
3873.3.1
by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test |
688 |
|
689 |
||
690 |
||
691 |
def parse_url(url): |
|
692 |
"""Extract the server address, the credentials and the path from the url.
|
|
693 |
||
694 |
user, password, host and path should be quoted if they contain reserved
|
|
695 |
chars.
|
|
696 |
||
697 |
:param url: an quoted url
|
|
698 |
||
699 |
:return: (scheme, user, password, host, port, path) tuple, all fields
|
|
700 |
are unquoted.
|
|
701 |
"""
|
|
702 |
if isinstance(url, unicode): |
|
703 |
raise errors.InvalidURL('should be ascii:\n%r' % url) |
|
704 |
url = url.encode('utf-8') |
|
705 |
(scheme, netloc, path, params, |
|
706 |
query, fragment) = urlparse.urlparse(url, allow_fragments=False) |
|
707 |
user = password = host = port = None |
|
708 |
if '@' in netloc: |
|
709 |
user, host = netloc.rsplit('@', 1) |
|
710 |
if ':' in user: |
|
711 |
user, password = user.split(':', 1) |
|
712 |
password = urllib.unquote(password) |
|
713 |
user = urllib.unquote(user) |
|
714 |
else: |
|
715 |
host = netloc |
|
716 |
||
4253.4.2
by Jelmer Vernooij
Still parse port in case of ipv6. |
717 |
if ':' in host and not (host[0] == '[' and host[-1] == ']'): #there *is* port |
718 |
host, port = host.rsplit(':',1) |
|
719 |
try: |
|
720 |
port = int(port) |
|
721 |
except ValueError: |
|
722 |
raise errors.InvalidURL('invalid port number %s in url:\n%s' % |
|
723 |
(port, url)) |
|
4253.4.3
by Jelmer Vernooij
Support empty host name. |
724 |
if host != "" and host[0] == '[' and host[-1] == ']': #IPv6 |
4253.4.2
by Jelmer Vernooij
Still parse port in case of ipv6. |
725 |
host = host[1:-1] |
3873.3.2
by Martin Pool
Accept ipv6 literals in URLs |
726 |
|
3873.3.1
by Martin Pool
Move Transport._split_url to urlutils, and ad a simple test |
727 |
host = urllib.unquote(host) |
728 |
path = urllib.unquote(path) |
|
729 |
||
730 |
return (scheme, user, password, host, port, path) |