~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: Jelmer Vernooij
Date: 2011-12-19 10:58:39 UTC
mfrom: (6383 +trunk)
mto: This revision was merged to the branch mainline in revision 6386.
Revision ID: jelmer@canonical.com-20111219105839-uji05ck4rkm1mj4j

Merge bzr.dev.

files modified:
bzrlib/filters/__init__.py

bzrlib/filters/eol.py

bzrlib/mail_client.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/weave_fmt/repository.py

bzrlib/smart/medium.py

bzrlib/tests/http_server.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_urlutils.py

bzrlib/transport/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

doc/en/release-notes/bzr-2.5.txt

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from posixpath import split as _posix_split

import urllib

import urlparse

from bzrlib import (

return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]

# Private copies of quote and unquote, copied from Python's

# urllib module because urllib unconditionally imports socket, which imports

# ssl.

always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'

'abcdefghijklmnopqrstuvwxyz'

'0123456789' '_.-')

_safe_map = {}

for i, c in zip(xrange(256), str(bytearray(xrange(256)))):

_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)

_safe_quoters = {}

def quote(s, safe='/'):

"""quote('abc def') -> 'abc%20def'

Each part of a URL, e.g. the path info, the query, etc., has a

different set of reserved characters that must be quoted.

RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

the following reserved characters.

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

"$" | ","

Each of these characters is reserved in some component of a URL,

but not necessarily in all of them.

By default, the quote function is intended for quoting the path

section of a URL. Thus, it will not encode '/'. This character

is reserved, but in typical usage the quote function is being

called on a path where the existing slash characters are used as

reserved characters.

"""

# fastpath

if not s:

100

if s is None:

101

raise TypeError('None object cannot be quoted')

102

return s

103

cachekey = (safe, always_safe)

104

try:

105

(quoter, safe) = _safe_quoters[cachekey]

106

except KeyError:

107

safe_map = _safe_map.copy()

108

safe_map.update([(c, c) for c in safe])

109

quoter = safe_map.__getitem__

110

safe = always_safe + safe

111

_safe_quoters[cachekey] = (quoter, safe)

112

if not s.rstrip(safe):

113

return s

114

return ''.join(map(quoter, s))

115

116

117

_hexdig = '0123456789ABCDEFabcdef'

118

_hextochr = dict((a + b, chr(int(a + b, 16)))

119

for a in _hexdig for b in _hexdig)

120

121

def unquote(s):

122

"""unquote('abc%20def') -> 'abc def'."""

123

res = s.split('%')

124

# fastpath

125

if len(res) == 1:

126

return s

127

s = res[0]

128

for item in res[1:]:

129

try:

130

s += _hextochr[item[:2]] + item[2:]

131

except KeyError:

132

s += '%' + item

133

except UnicodeDecodeError:

134

s += unichr(int(item[:2], 16)) + item[2:]

135

return s

136

137

138

def escape(relpath):

139

"""Escape relpath to be a valid url."""

140

if isinstance(relpath, unicode):

141

relpath = relpath.encode('utf-8')

142

# After quoting and encoding, the path should be perfectly

143

# safe as a plain ASCII string, str() just enforces this

return str(urllib.quote(relpath, safe='/~'))

144

return str(quote(relpath, safe='/~'))

145

146

147

def file_relpath(base, path):

568

641

This returns a Unicode path from a URL

569

642

"""

570

643

# jam 20060427 URLs are supposed to be ASCII only strings

571

# If they are passed in as unicode, urllib.unquote

644

# If they are passed in as unicode, unquote

572

645

# will return a UNICODE string, which actually contains

573

646

# utf-8 bytes. So we have to ensure that they are

574

647

# plain ASCII strings, or the final .decode will

579

652

except UnicodeError, e:

580

653

raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))

581

654

582

unquoted = urllib.unquote(url)

655

unquoted = unquote(url)

583

656

try:

584

657

unicode_path = unquoted.decode('utf-8')

585

658

except UnicodeError, e:

744

817

port, quoted_path):

745

818

self.scheme = scheme

746

819

self.quoted_host = quoted_host

747

self.host = urllib.unquote(self.quoted_host)

820

self.host = unquote(self.quoted_host)

748

821

self.quoted_user = quoted_user

749

822

if self.quoted_user is not None:

750

self.user = urllib.unquote(self.quoted_user)

823

self.user = unquote(self.quoted_user)

751

824

else:

752

825

self.user = None

753

826

self.quoted_password = quoted_password

754

827

if self.quoted_password is not None:

755

self.password = urllib.unquote(self.quoted_password)

828

self.password = unquote(self.quoted_password)

756

829

else:

757

830

self.password = None

758

831

self.port = port

759

832

self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)

760

self.path = urllib.unquote(self.quoted_path)

833

self.path = unquote(self.quoted_path)

761

834

762

835

def __eq__(self, other):

763

836

return (isinstance(other, self.__class__) and

873

946

if offset is not None:

874

947

relative = unescape(offset).encode('utf-8')

875

948

path = self._combine_paths(self.path, relative)

876

path = urllib.quote(path, safe="/~")

949

path = quote(path, safe="/~")

877

950

else:

878

951

path = self.quoted_path

879

952

return self.__class__(self.scheme, self.quoted_user,

Older »