60
61
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
64
# Private copies of quote and unquote, copied from Python's
65
# urllib module because urllib unconditionally imports socket, which imports
68
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
69
'abcdefghijklmnopqrstuvwxyz'
72
for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
73
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
77
def quote(s, safe='/'):
78
"""quote('abc def') -> 'abc%20def'
80
Each part of a URL, e.g. the path info, the query, etc., has a
81
different set of reserved characters that must be quoted.
83
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
84
the following reserved characters.
86
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
89
Each of these characters is reserved in some component of a URL,
90
but not necessarily in all of them.
92
By default, the quote function is intended for quoting the path
93
section of a URL. Thus, it will not encode '/'. This character
94
is reserved, but in typical usage the quote function is being
95
called on a path where the existing slash characters are used as
101
raise TypeError('None object cannot be quoted')
103
cachekey = (safe, always_safe)
105
(quoter, safe) = _safe_quoters[cachekey]
107
safe_map = _safe_map.copy()
108
safe_map.update([(c, c) for c in safe])
109
quoter = safe_map.__getitem__
110
safe = always_safe + safe
111
_safe_quoters[cachekey] = (quoter, safe)
112
if not s.rstrip(safe):
114
return ''.join(map(quoter, s))
117
_hexdig = '0123456789ABCDEFabcdef'
118
_hextochr = dict((a + b, chr(int(a + b, 16)))
119
for a in _hexdig for b in _hexdig)
122
"""unquote('abc%20def') -> 'abc def'."""
130
s += _hextochr[item[:2]] + item[2:]
133
except UnicodeDecodeError:
134
s += unichr(int(item[:2], 16)) + item[2:]
63
138
def escape(relpath):
64
139
"""Escape relpath to be a valid url."""
65
140
if isinstance(relpath, unicode):
66
141
relpath = relpath.encode('utf-8')
67
142
# After quoting and encoding, the path should be perfectly
68
143
# safe as a plain ASCII string, str() just enforces this
69
return str(urllib.quote(relpath, safe='/~'))
144
return str(quote(relpath, safe='/~'))
72
147
def file_relpath(base, path):
78
153
raise ValueError('Length of base (%r) must equal or'
79
154
' exceed the platform minimum url length (which is %d)' %
80
155
(base, MIN_ABS_FILEURL_LENGTH))
81
base = local_path_from_url(base)
82
path = local_path_from_url(path)
156
base = osutils.normpath(local_path_from_url(base))
157
path = osutils.normpath(local_path_from_url(path))
83
158
return escape(osutils.relpath(base, path))
388
462
"""On win32 the drive letter needs to be added to the url base."""
389
463
# Strip off the drive letter
390
464
# path is currently /C:/foo
391
if len(path) < 3 or path[2] not in ':|' or path[3] != '/':
465
if len(path) < 4 or path[2] not in ':|' or path[3] != '/':
392
466
raise errors.InvalidURL(url_base + path,
393
467
'win32 file:/// paths need a drive letter')
394
468
url_base += path[0:3] # file:// + /C:
442
516
:param url: A relative or absolute URL
443
517
:return: (url, subsegments)
445
(parent_url, child_dir) = split(url)
446
subsegments = child_dir.split(",")
447
if len(subsegments) == 1:
519
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
520
# operates on urls not url+segments, and Transport classes
521
# should not be blindly adding slashes in the first place.
522
lurl = strip_trailing_slash(url)
523
# Segments begin at first comma after last forward slash, if one exists
524
segment_start = lurl.find(",", lurl.rfind("/")+1)
525
if segment_start == -1:
449
return (join(parent_url, subsegments[0]), subsegments[1:])
527
return (lurl[:segment_start], lurl[segment_start+1:].split(","))
452
530
def split_segment_parameters(url):
563
641
This returns a Unicode path from a URL
565
643
# jam 20060427 URLs are supposed to be ASCII only strings
566
# If they are passed in as unicode, urllib.unquote
644
# If they are passed in as unicode, unquote
567
645
# will return a UNICODE string, which actually contains
568
646
# utf-8 bytes. So we have to ensure that they are
569
647
# plain ASCII strings, or the final .decode will
739
817
port, quoted_path):
740
818
self.scheme = scheme
741
819
self.quoted_host = quoted_host
742
self.host = urllib.unquote(self.quoted_host)
820
self.host = unquote(self.quoted_host)
743
821
self.quoted_user = quoted_user
744
822
if self.quoted_user is not None:
745
self.user = urllib.unquote(self.quoted_user)
823
self.user = unquote(self.quoted_user)
748
826
self.quoted_password = quoted_password
749
827
if self.quoted_password is not None:
750
self.password = urllib.unquote(self.quoted_password)
828
self.password = unquote(self.quoted_password)
752
830
self.password = None
754
self.quoted_path = quoted_path
755
self.path = urllib.unquote(self.quoted_path)
832
self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
833
self.path = unquote(self.quoted_path)
757
835
def __eq__(self, other):
758
836
return (isinstance(other, self.__class__) and