~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: Patch Queue Manager
Date: 2011-12-19 01:56:41 UTC
mfrom: (6379.4.8 urlutils-quote)
Revision ID: pqm@pqm.ubuntu.com-20111219015641-k7fyhyf6rfgk93m2

(jelmer) Include quote() and unquote() directly in bzrlib.urlutils. (Jelmer
Vernooij)

files modified:
bzrlib/mail_client.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/weave_fmt/repository.py

bzrlib/smart/medium.py

bzrlib/tests/http_server.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_urlutils.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

doc/en/release-notes/bzr-2.5.txt

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from posixpath import split as _posix_split

import urllib

import urlparse

from bzrlib import (

return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]

# Private copies of quote and unquote, copied from Python's

# urllib module because urllib unconditionally imports socket, which imports

# ssl.

always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'abcdefghijklmnopqrstuvwxyz'

'0123456789' '_.-')

_safe_map = {}

for i, c in zip(xrange(256), str(bytearray(xrange(256)))):

_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)

_safe_quoters = {}

def quote(s, safe='/'):

"""quote('abc def') -> 'abc%20def'

Each part of a URL, e.g. the path info, the query, etc., has a

different set of reserved characters that must be quoted.

RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

the following reserved characters.

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

"$" | ","

Each of these characters is reserved in some component of a URL,

but not necessarily in all of them.

By default, the quote function is intended for quoting the path

section of a URL. Thus, it will not encode '/'. This character

is reserved, but in typical usage the quote function is being

called on a path where the existing slash characters are used as

reserved characters.

"""

# fastpath

if not s:

if s is None:

raise TypeError('None object cannot be quoted')

100

return s

101

cachekey = (safe, always_safe)

102

try:

103

(quoter, safe) = _safe_quoters[cachekey]

104

except KeyError:

105

safe_map = _safe_map.copy()

106

safe_map.update([(c, c) for c in safe])

107

quoter = safe_map.__getitem__

108

safe = always_safe + safe

109

_safe_quoters[cachekey] = (quoter, safe)

110

if not s.rstrip(safe):

111

return s

112

return ''.join(map(quoter, s))

113

114

115

_hexdig = '0123456789ABCDEFabcdef'

116

_hextochr = dict((a + b, chr(int(a + b, 16)))

117

for a in _hexdig for b in _hexdig)

118

119

def unquote(s):

120

"""unquote('abc%20def') -> 'abc def'."""

121

res = s.split('%')

122

# fastpath

123

if len(res) == 1:

124

return s

125

s = res[0]

126

for item in res[1:]:

127

try:

128

s += _hextochr[item[:2]] + item[2:]

129

except KeyError:

130

s += '%' + item

131

except UnicodeDecodeError:

132

s += unichr(int(item[:2], 16)) + item[2:]

133

return s

134

135

136

def escape(relpath):

137

"""Escape relpath to be a valid url."""

138

if isinstance(relpath, unicode):

139

relpath = relpath.encode('utf-8')

140

# After quoting and encoding, the path should be perfectly

141

# safe as a plain ASCII string, str() just enforces this

return str(urllib.quote(relpath, safe='/~'))

142

return str(quote(relpath, safe='/~'))

143

144

145

def file_relpath(base, path):

566

639

This returns a Unicode path from a URL

567

640

"""

568

641

# jam 20060427 URLs are supposed to be ASCII only strings

569

# If they are passed in as unicode, urllib.unquote

642

# If they are passed in as unicode, unquote

570

643

# will return a UNICODE string, which actually contains

571

644

# utf-8 bytes. So we have to ensure that they are

572

645

# plain ASCII strings, or the final .decode will

577

650

except UnicodeError, e:

578

651

raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))

579

652

580

unquoted = urllib.unquote(url)

653

unquoted = unquote(url)

581

654

try:

582

655

unicode_path = unquoted.decode('utf-8')

583

656

except UnicodeError, e:

742

815

port, quoted_path):

743

816

self.scheme = scheme

744

817

self.quoted_host = quoted_host

745

self.host = urllib.unquote(self.quoted_host)

818

self.host = unquote(self.quoted_host)

746

819

self.quoted_user = quoted_user

747

820

if self.quoted_user is not None:

748

self.user = urllib.unquote(self.quoted_user)

821

self.user = unquote(self.quoted_user)

749

822

else:

750

823

self.user = None

751

824

self.quoted_password = quoted_password

752

825

if self.quoted_password is not None:

753

self.password = urllib.unquote(self.quoted_password)

826

self.password = unquote(self.quoted_password)

754

827

else:

755

828

self.password = None

756

829

self.port = port

757

830

self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)

758

self.path = urllib.unquote(self.quoted_path)

831

self.path = unquote(self.quoted_path)

759

832

760

833

def __eq__(self, other):

761

834

return (isinstance(other, self.__class__) and

871

944

if offset is not None:

872

945

relative = unescape(offset).encode('utf-8')

873

946

path = self._combine_paths(self.path, relative)

874

path = urllib.quote(path, safe="/~")

947

path = quote(path, safe="/~")

875

948

else:

876

949

path = self.quoted_path

877

950

return self.__class__(self.scheme, self.quoted_user,

Older »