60
61
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
64
# Private copies of quote and unquote, copied from Python's
65
# urllib module because urllib unconditionally imports socket, which imports
68
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
69
'abcdefghijklmnopqrstuvwxyz'
72
for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
73
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
77
def quote(s, safe='/'):
78
"""quote('abc def') -> 'abc%20def'
80
Each part of a URL, e.g. the path info, the query, etc., has a
81
different set of reserved characters that must be quoted.
83
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
84
the following reserved characters.
86
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
89
Each of these characters is reserved in some component of a URL,
90
but not necessarily in all of them.
92
By default, the quote function is intended for quoting the path
93
section of a URL. Thus, it will not encode '/'. This character
94
is reserved, but in typical usage the quote function is being
95
called on a path where the existing slash characters are used as
101
raise TypeError('None object cannot be quoted')
103
cachekey = (safe, always_safe)
105
(quoter, safe) = _safe_quoters[cachekey]
107
safe_map = _safe_map.copy()
108
safe_map.update([(c, c) for c in safe])
109
quoter = safe_map.__getitem__
110
safe = always_safe + safe
111
_safe_quoters[cachekey] = (quoter, safe)
112
if not s.rstrip(safe):
114
return ''.join(map(quoter, s))
117
_hexdig = '0123456789ABCDEFabcdef'
118
_hextochr = dict((a + b, chr(int(a + b, 16)))
119
for a in _hexdig for b in _hexdig)
122
"""unquote('abc%20def') -> 'abc def'."""
130
s += _hextochr[item[:2]] + item[2:]
133
except UnicodeDecodeError:
134
s += unichr(int(item[:2], 16)) + item[2:]
63
138
def escape(relpath):
64
139
"""Escape relpath to be a valid url."""
65
140
if isinstance(relpath, unicode):
66
141
relpath = relpath.encode('utf-8')
67
142
# After quoting and encoding, the path should be perfectly
68
143
# safe as a plain ASCII string, str() just enforces this
69
return str(urllib.quote(relpath, safe='/~'))
144
return str(quote(relpath, safe='/~'))
72
147
def file_relpath(base, path):
439
516
:param url: A relative or absolute URL
440
517
:return: (url, subsegments)
442
(parent_url, child_dir) = split(url)
443
subsegments = child_dir.split(",")
444
if len(subsegments) == 1:
519
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
520
# operates on urls not url+segments, and Transport classes
521
# should not be blindly adding slashes in the first place.
522
lurl = strip_trailing_slash(url)
523
# Segments begin at first comma after last forward slash, if one exists
524
segment_start = lurl.find(",", lurl.rfind("/")+1)
525
if segment_start == -1:
446
return (join(parent_url, subsegments[0]), subsegments[1:])
527
return (lurl[:segment_start], lurl[segment_start+1:].split(","))
449
530
def split_segment_parameters(url):
729
810
return osutils.pathjoin(*segments)
816
def __init__(self, scheme, quoted_user, quoted_password, quoted_host,
819
self.quoted_host = quoted_host
820
self.host = unquote(self.quoted_host)
821
self.quoted_user = quoted_user
822
if self.quoted_user is not None:
823
self.user = unquote(self.quoted_user)
826
self.quoted_password = quoted_password
827
if self.quoted_password is not None:
828
self.password = unquote(self.quoted_password)
832
self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
833
self.path = unquote(self.quoted_path)
835
def __eq__(self, other):
836
return (isinstance(other, self.__class__) and
837
self.scheme == other.scheme and
838
self.host == other.host and
839
self.user == other.user and
840
self.password == other.password and
841
self.path == other.path)
844
return "<%s(%r, %r, %r, %r, %r, %r)>" % (
845
self.__class__.__name__,
846
self.scheme, self.quoted_user, self.quoted_password,
847
self.quoted_host, self.port, self.quoted_path)
850
def from_string(cls, url):
851
"""Create a URL object from a string.
853
:param url: URL as bytestring
855
if isinstance(url, unicode):
856
raise errors.InvalidURL('should be ascii:\n%r' % url)
857
url = url.encode('utf-8')
858
(scheme, netloc, path, params,
859
query, fragment) = urlparse.urlparse(url, allow_fragments=False)
860
user = password = host = port = None
862
user, host = netloc.rsplit('@', 1)
864
user, password = user.split(':', 1)
868
if ':' in host and not (host[0] == '[' and host[-1] == ']'):
870
host, port = host.rsplit(':',1)
874
raise errors.InvalidURL('invalid port number %s in url:\n%s' %
876
if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
879
return cls(scheme, user, password, host, port, path)
882
netloc = self.quoted_host
884
netloc = "[%s]" % netloc
885
if self.quoted_user is not None:
886
# Note that we don't put the password back even if we
887
# have one so that it doesn't get accidentally
889
netloc = '%s@%s' % (self.quoted_user, netloc)
890
if self.port is not None:
891
netloc = '%s:%d' % (netloc, self.port)
892
return urlparse.urlunparse(
893
(self.scheme, netloc, self.quoted_path, None, None, None))
896
def _combine_paths(base_path, relpath):
897
"""Transform a Transport-relative path to a remote absolute path.
899
This does not handle substitution of ~ but does handle '..' and '.'
904
t._combine_paths('/home/sarah', 'project/foo')
905
=> '/home/sarah/project/foo'
906
t._combine_paths('/home/sarah', '../../etc')
908
t._combine_paths('/home/sarah', '/etc')
911
:param base_path: base path
912
:param relpath: relative url string for relative part of remote path.
913
:return: urlencoded string for final path.
915
if not isinstance(relpath, str):
916
raise errors.InvalidURL(relpath)
917
relpath = _url_hex_escapes_re.sub(_unescape_safe_chars, relpath)
918
if relpath.startswith('/'):
921
base_parts = base_path.split('/')
922
if len(base_parts) > 0 and base_parts[-1] == '':
923
base_parts = base_parts[:-1]
924
for p in relpath.split('/'):
926
if len(base_parts) == 0:
927
# In most filesystems, a request for the parent
928
# of root, just returns root.
935
path = '/'.join(base_parts)
936
if not path.startswith('/'):
940
def clone(self, offset=None):
941
"""Return a new URL for a path relative to this URL.
943
:param offset: A relative path, already urlencoded
944
:return: `URL` instance
946
if offset is not None:
947
relative = unescape(offset).encode('utf-8')
948
path = self._combine_paths(self.path, relative)
949
path = quote(path, safe="/~")
951
path = self.quoted_path
952
return self.__class__(self.scheme, self.quoted_user,
953
self.quoted_password, self.quoted_host, self.port,
733
957
def parse_url(url):
734
958
"""Extract the server address, the credentials and the path from the url.
739
963
:param url: an quoted url
741
964
:return: (scheme, user, password, host, port, path) tuple, all fields
744
if isinstance(url, unicode):
745
raise errors.InvalidURL('should be ascii:\n%r' % url)
746
url = url.encode('utf-8')
747
(scheme, netloc, path, params,
748
query, fragment) = urlparse.urlparse(url, allow_fragments=False)
749
user = password = host = port = None
751
user, host = netloc.rsplit('@', 1)
753
user, password = user.split(':', 1)
754
password = urllib.unquote(password)
755
user = urllib.unquote(user)
759
if ':' in host and not (host[0] == '[' and host[-1] == ']'): #there *is* port
760
host, port = host.rsplit(':',1)
764
raise errors.InvalidURL('invalid port number %s in url:\n%s' %
766
if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
769
host = urllib.unquote(host)
770
path = urllib.unquote(path)
772
return (scheme, user, password, host, port, path)
967
parsed_url = URL.from_string(url)
968
return (parsed_url.scheme, parsed_url.user, parsed_url.password,
969
parsed_url.host, parsed_url.port, parsed_url.path)