60
59
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
62
# Private copies of quote and unquote, copied from Python's
63
# urllib module because urllib unconditionally imports socket, which imports
66
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
67
'abcdefghijklmnopqrstuvwxyz'
70
for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
71
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
75
def quote(s, safe='/'):
76
"""quote('abc def') -> 'abc%20def'
78
Each part of a URL, e.g. the path info, the query, etc., has a
79
different set of reserved characters that must be quoted.
81
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
82
the following reserved characters.
84
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
87
Each of these characters is reserved in some component of a URL,
88
but not necessarily in all of them.
90
By default, the quote function is intended for quoting the path
91
section of a URL. Thus, it will not encode '/'. This character
92
is reserved, but in typical usage the quote function is being
93
called on a path where the existing slash characters are used as
99
raise TypeError('None object cannot be quoted')
101
cachekey = (safe, always_safe)
103
(quoter, safe) = _safe_quoters[cachekey]
105
safe_map = _safe_map.copy()
106
safe_map.update([(c, c) for c in safe])
107
quoter = safe_map.__getitem__
108
safe = always_safe + safe
109
_safe_quoters[cachekey] = (quoter, safe)
110
if not s.rstrip(safe):
112
return ''.join(map(quoter, s))
115
_hexdig = '0123456789ABCDEFabcdef'
116
_hextochr = dict((a + b, chr(int(a + b, 16)))
117
for a in _hexdig for b in _hexdig)
120
"""unquote('abc%20def') -> 'abc def'."""
128
s += _hextochr[item[:2]] + item[2:]
131
except UnicodeDecodeError:
132
s += unichr(int(item[:2], 16)) + item[2:]
63
136
def escape(relpath):
64
137
"""Escape relpath to be a valid url."""
65
138
if isinstance(relpath, unicode):
66
139
relpath = relpath.encode('utf-8')
67
140
# After quoting and encoding, the path should be perfectly
68
141
# safe as a plain ASCII string, str() just enforces this
69
return str(urllib.quote(relpath, safe='/~'))
142
return str(quote(relpath, safe='/~'))
72
145
def file_relpath(base, path):
566
639
This returns a Unicode path from a URL
568
641
# jam 20060427 URLs are supposed to be ASCII only strings
569
# If they are passed in as unicode, urllib.unquote
642
# If they are passed in as unicode, unquote
570
643
# will return a UNICODE string, which actually contains
571
644
# utf-8 bytes. So we have to ensure that they are
572
645
# plain ASCII strings, or the final .decode will
742
815
port, quoted_path):
743
816
self.scheme = scheme
744
817
self.quoted_host = quoted_host
745
self.host = urllib.unquote(self.quoted_host)
818
self.host = unquote(self.quoted_host)
746
819
self.quoted_user = quoted_user
747
820
if self.quoted_user is not None:
748
self.user = urllib.unquote(self.quoted_user)
821
self.user = unquote(self.quoted_user)
751
824
self.quoted_password = quoted_password
752
825
if self.quoted_password is not None:
753
self.password = urllib.unquote(self.quoted_password)
826
self.password = unquote(self.quoted_password)
755
828
self.password = None
757
830
self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
758
self.path = urllib.unquote(self.quoted_path)
831
self.path = unquote(self.quoted_path)
760
833
def __eq__(self, other):
761
834
return (isinstance(other, self.__class__) and