~bzr-pqm/bzr/bzr.dev : contents of bzrlib/urlutils.py at revision 5452

~bzr-pqm/bzr/bzr.dev : (revision 5452)

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2006-2010 Canonical Ltd
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	16
	17	"""A collection of function for handling URL operations."""
	18
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	19	import os
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	20	import re
	21	import sys
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	22
	23	from bzrlib.lazy_import import lazy_import
	24	lazy_import(globals(), """
	25	from posixpath import split as _posix_split, normpath as _posix_normpath
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	26	import urllib
3242.3.26 by Aaron Bentley Implement rebase_url	27	import urlparse
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	28
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	29	from bzrlib import (
	30	errors,
	31	osutils,
	32	)
	33	""")
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	34
	35
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	36	def basename(url, exclude_trailing_slash=True):
	37	"""Return the last component of a URL.
	38
	39	:param url: The URL in question
	40	:param exclude_trailing_slash: If the url looks like "path/to/foo/"
	41	ignore the final slash and return 'foo' rather than ''
	42	:return: Just the final component of the URL. This can return ''
	43	if you don't exclude_trailing_slash, or if you are at the
	44	root of the URL.
	45	"""
	46	return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
	47
	48
	49	def dirname(url, exclude_trailing_slash=True):
	50	"""Return the parent directory of the given path.
	51
	52	:param url: Relative or absolute URL
	53	:param exclude_trailing_slash: Remove a final slash
	54	(treat http://host/foo/ as http://host/foo, but
	55	http://host/ stays http://host/)
	56	:return: Everything in the URL except the last path chunk
	57	"""
	58	# TODO: jam 20060502 This was named dirname to be consistent
	59	# with the os functions, but maybe "parent" would be better
	60	return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
	61
	62
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	63	def escape(relpath):
	64	"""Escape relpath to be a valid url."""
	65	if isinstance(relpath, unicode):
	66	relpath = relpath.encode('utf-8')
	67	# After quoting and encoding, the path should be perfectly
	68	# safe as a plain ASCII string, str() just enforces this
4098.3.1 by Jonathan Lange Don't escape tildes	69	return str(urllib.quote(relpath, safe='/~'))
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	70
	71
1685.1.46 by John Arbash Meinel Sorting functions by name.	72	def file_relpath(base, path):
1685.1.46 by John Arbash Meinel Sorting functions by name.	73	"""Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	74
1685.1.46 by John Arbash Meinel Sorting functions by name.	75	This assumes that both paths are already fully specified file:// URLs.
1685.1.46 by John Arbash Meinel Sorting functions by name.	76	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	77	if len(base) < MIN_ABS_FILEURL_LENGTH:
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	78	raise ValueError('Length of base (%r) must equal or'
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	79	' exceed the platform minimum url length (which is %d)' %
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	80	(base, MIN_ABS_FILEURL_LENGTH))
1685.1.46 by John Arbash Meinel Sorting functions by name.	81	base = local_path_from_url(base)
1685.1.46 by John Arbash Meinel Sorting functions by name.	82	path = local_path_from_url(path)
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	83	return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel Sorting functions by name.	84
1685.1.46 by John Arbash Meinel Sorting functions by name.	85
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	86	def _find_scheme_and_separator(url):
	87	"""Find the scheme separator (://) and the first path separator
	88
	89	This is just a helper functions for other path utilities.
	90	It could probably be replaced by urlparse
	91	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	92	m = _url_scheme_re.match(url)
	93	if not m:
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	94	return None, None
	95
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	96	scheme = m.group('scheme')
	97	path = m.group('path')
	98
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	99	# Find the path separating slash
	100	# (first slash after the ://)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	101	first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	102	if first_path_slash == -1:
1685.1.56 by John Arbash Meinel Fixing _find_scheme_and_separator	103	return len(scheme), None
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	104	return len(scheme), first_path_slash+m.start('path')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	105
	106
5254.2.1 by Gordon Tyler Fixed how get_transport's convert_path_to_url tests whether a path is actually a URL.	107	def is_url(url):
	108	"""Tests whether a URL is in actual fact a URL."""
	109	return _url_scheme_re.match(url) is not None
	110
	111
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	112	def join(base, *args):
	113	"""Create a URL by joining sections.
	114
	115	This will normalize '..', assuming that paths are absolute
	116	(it assumes no symlinks in either path)
	117
	118	If any of *args is an absolute URL, it will be treated correctly.
	119	Example:
	120	join('http://foo', 'http://bar') => 'http://bar'
	121	join('http://foo', 'bar') => 'http://foo/bar'
	122	join('http://foo', 'bar', '../baz') => 'http://foo/baz'
	123	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	124	if not args:
	125	return base
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	126	scheme_end, path_start = _find_scheme_and_separator(base)
	127	if scheme_end is None and path_start is None:
	128	path_start = 0
	129	elif path_start is None:
	130	path_start = len(base)
	131	path = base[path_start:]
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	132	for arg in args:
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	133	arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
	134	if arg_scheme_end is None and arg_path_start is None:
	135	arg_path_start = 0
	136	elif arg_path_start is None:
	137	arg_path_start = len(arg)
5254.1.5 by Gordon Tyler Fixes according to spiv's review.	138	if arg_scheme_end is not None:
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	139	base = arg
	140	path = arg[arg_path_start:]
	141	scheme_end = arg_scheme_end
	142	path_start = arg_path_start
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	143	else:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	144	path = joinpath(path, arg)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	145	return base[:path_start] + path
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	146
	147
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	148	def joinpath(base, *args):
	149	"""Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	150
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	151	This is somewhat like osutils.joinpath, but intended for URLs.
	152
	153	XXX: this duplicates some normalisation logic, and also duplicates a lot of
	154	path handling logic that already exists in some Transport implementations.
	155	We really should try to have exactly one place in the code base responsible
	156	for combining paths of URLs.
	157	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	158	path = base.split('/')
	159	if len(path) > 1 and path[-1] == '':
	160	#If the path ends in a trailing /, remove it.
	161	path.pop()
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	162	for arg in args:
	163	if arg.startswith('/'):
	164	path = []
	165	for chunk in arg.split('/'):
	166	if chunk == '.':
	167	continue
	168	elif chunk == '..':
	169	if path == ['']:
	170	raise errors.InvalidURLJoin('Cannot go above root',
	171	base, args)
	172	path.pop()
	173	else:
	174	path.append(chunk)
	175	if path == ['']:
	176	return '/'
	177	else:
	178	return '/'.join(path)
	179
	180
1685.1.46 by John Arbash Meinel Sorting functions by name.	181	# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
	182	def _posix_local_path_from_url(url):
	183	"""Convert a url like file:///path/to/foo into /path/to/foo"""
4828.1.1 by Michael Hudson test and fix	184	file_localhost_prefix = 'file://localhost/'
	185	if url.startswith(file_localhost_prefix):
	186	path = url[len(file_localhost_prefix) - 1:]
	187	elif not url.startswith('file:///'):
	188	raise errors.InvalidURL(
	189	url, 'local urls must start with file:/// or file://localhost/')
	190	else:
	191	path = url[len('file://'):]
1685.1.46 by John Arbash Meinel Sorting functions by name.	192	# We only strip off 2 slashes
4828.1.1 by Michael Hudson test and fix	193	return unescape(path)
1685.1.46 by John Arbash Meinel Sorting functions by name.	194
	195
	196	def _posix_local_path_to_url(path):
	197	"""Convert a local path like ./foo into a URL like file:///path/to/foo
	198
	199	This also handles transforming escaping unicode characters, etc.
	200	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	201	# importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel Sorting functions by name.	202	# on non-posix platforms
1711.4.5 by John Arbash Meinel the _posix_* routines should use posixpath not os.path, so tests pass on win32	203	return 'file://' + escape(_posix_normpath(
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	204	osutils._posix_abspath(path)))
1685.1.46 by John Arbash Meinel Sorting functions by name.	205
	206
	207	def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	208	"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	209	if not url.startswith('file://'):
	210	raise errors.InvalidURL(url, 'local urls must start with file:///, '
	211	'UNC path urls must start with file://')
1685.1.46 by John Arbash Meinel Sorting functions by name.	212	# We strip off all 3 slashes
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	213	win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	214	# check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	215	if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	216	if (win32_url[2] == '/'
	217	or win32_url[3] in '\|:'):
	218	raise errors.InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	219	' have form file://HOST/path')
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	220	return unescape(win32_url)
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	221
	222	# allow empty paths so we can serve all roots
	223	if win32_url == '///':
	224	return '/'
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	225
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	226	# usual local path with drive letter
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	227	if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	228	'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	229	or win32_url[4] not in '\|:'
	230	or win32_url[5] != '/'):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	231	raise errors.InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	232	' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	233	return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel Sorting functions by name.	234
	235
	236	def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	237	"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel Sorting functions by name.	238
	239	This also handles transforming escaping unicode characters, etc.
	240	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	241	# importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	242	# on non-win32 platform
	243	# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
	244	# which actually strips trailing space characters.
5278.1.5 by Martin Pool Correct more sloppy use of the term 'Linux'	245	# The worst part is that on linux ntpath.abspath has different
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	246	# semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	247	if path == '/':
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	248	return 'file:///'
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	249
2279.4.2 by Alexander Belchenko Don't do normpath after abspath, because this function is called inside abspath	250	win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	251	# check for UNC path \\HOST\path
	252	if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	253	return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko ensure that local_path_to_url() always returns plain string, not unicode.	254	return ('file:///' + str(win32_path[0].upper()) + ':' +
	255	escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel Sorting functions by name.	256
	257
	258	local_path_to_url = _posix_local_path_to_url
	259	local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	260	MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel [merge] bzr.dev 1790	261	WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel Sorting functions by name.	262
	263	if sys.platform == 'win32':
	264	local_path_to_url = _win32_local_path_to_url
	265	local_path_from_url = _win32_local_path_from_url
	266
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	267	MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	268
	269
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	270	_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	271	_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
	272
	273
	274	def _unescape_safe_chars(matchobj):
	275	"""re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	276
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	277	e.g. '%7E' will be converted to '~'.
	278	"""
	279	hex_digits = matchobj.group(0)[1:]
	280	char = chr(int(hex_digits, 16))
	281	if char in _url_dont_escape_characters:
	282	return char
	283	else:
	284	return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	285
	286
	287	def normalize_url(url):
	288	"""Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	289
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	290	This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	291	special characters, etc.
	292
	293	It has two basic modes of operation, depending on whether the
	294	supplied string starts with a url specifier (scheme://) or not.
	295	If it does not have a specifier it is considered a local path,
	296	and will be converted into a file:/// url. Non-ascii characters
	297	will be encoded using utf-8.
	298	If it does have a url specifier, it will be treated as a "hybrid"
	299	URL. Basically, a URL that should have URL special characters already
	300	escaped (like +?&# etc), but may have unicode characters, etc
	301	which would not be valid in a real URL.
	302
	303	:param url: Either a hybrid URL or a local path
	304	:return: A normalized URL which only includes 7-bit ASCII characters.
	305	"""
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	306	scheme_end, path_start = _find_scheme_and_separator(url)
	307	if scheme_end is None:
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	308	return local_path_to_url(url)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	309	prefix = url[:path_start]
	310	path = url[path_start:]
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	311	if not isinstance(url, unicode):
	312	for c in url:
	313	if c not in _url_safe_characters:
1685.1.53 by John Arbash Meinel Updated normalize_url	314	raise errors.InvalidURL(url, 'URLs can only contain specific'
1685.1.53 by John Arbash Meinel Updated normalize_url	315	' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	316	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	317	return str(prefix + ''.join(path))
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	318
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	319	# We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	320	path_chars = list(path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	321
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	322	for i in xrange(len(path_chars)):
	323	if path_chars[i] not in _url_safe_characters:
	324	chars = path_chars[i].encode('utf-8')
	325	path_chars[i] = ''.join(
	326	['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
	327	path = ''.join(path_chars)
	328	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	329	return str(prefix + path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	330
	331
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	332	def relative_url(base, other):
	333	"""Return a path to other from base.
	334
	335	If other is unrelated to base, return other. Else return a relative path.
	336	This assumes no symlinks as part of the url.
	337	"""
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	338	dummy, base_first_slash = _find_scheme_and_separator(base)
	339	if base_first_slash is None:
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	340	return other
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	341
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	342	dummy, other_first_slash = _find_scheme_and_separator(other)
	343	if other_first_slash is None:
	344	return other
	345
	346	# this takes care of differing schemes or hosts
	347	base_scheme = base[:base_first_slash]
	348	other_scheme = other[:other_first_slash]
	349	if base_scheme != other_scheme:
	350	return other
3139.2.1 by Alexander Belchenko bugfix #90847: fix problem with parent location on another logical drive	351	elif sys.platform == 'win32' and base_scheme == 'file://':
	352	base_drive = base[base_first_slash+1:base_first_slash+3]
	353	other_drive = other[other_first_slash+1:other_first_slash+3]
	354	if base_drive != other_drive:
	355	return other
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	356
	357	base_path = base[base_first_slash+1:]
	358	other_path = other[other_first_slash+1:]
	359
	360	if base_path.endswith('/'):
	361	base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	362
	363	base_sections = base_path.split('/')
	364	other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	365
	366	if base_sections == ['']:
	367	base_sections = []
	368	if other_sections == ['']:
	369	other_sections = []
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	370
	371	output_sections = []
	372	for b, o in zip(base_sections, other_sections):
	373	if b != o:
	374	break
	375	output_sections.append(b)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	376
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	377	match_len = len(output_sections)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	378	output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	379	output_sections.extend(other_sections[match_len:])
	380
	381	return "/".join(output_sections) or "."
	382
	383
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	384	def _win32_extract_drive_letter(url_base, path):
	385	"""On win32 the drive letter needs to be added to the url base."""
	386	# Strip off the drive letter
	387	# path is currently /C:/foo
	388	if len(path) < 3 or path[2] not in ':\|' or path[3] != '/':
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	389	raise errors.InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	390	'win32 file:/// paths need a drive letter')
	391	url_base += path[0:3] # file:// + /C:
	392	path = path[3:] # /foo
	393	return url_base, path
	394
	395
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	396	def split(url, exclude_trailing_slash=True):
	397	"""Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	398
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	399	:param url: A relative or absolute URL
	400	:param exclude_trailing_slash: Strip off a final '/' if it is part
	401	of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	402
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	403	:return: (parent_url, child_dir). child_dir may be the empty string if we're at
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	404	the root.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	405	"""
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	406	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	407
	408	if first_path_slash is None:
	409	# We have either a relative path, or no separating slash
	410	if scheme_loc is None:
	411	# Relative path
	412	if exclude_trailing_slash and url.endswith('/'):
	413	url = url[:-1]
	414	return _posix_split(url)
	415	else:
	416	# Scheme with no path
	417	return url, ''
	418
	419	# We have a fully defined path
	420	url_base = url[:first_path_slash] # http://host, file://
	421	path = url[first_path_slash:] # /file/foo
	422
	423	if sys.platform == 'win32' and url.startswith('file:///'):
	424	# Strip off the drive letter
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	425	# url_base is currently file://
1711.2.39 by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths.	426	# path is currently /C:/foo
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	427	url_base, path = _win32_extract_drive_letter(url_base, path)
	428	# now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	429
	430	if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
	431	path = path[:-1]
	432	head, tail = _posix_split(path)
	433	return url_base + head, tail
	434
1685.1.46 by John Arbash Meinel Sorting functions by name.	435
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	436	def split_segment_parameters_raw(url):
5163.2.1 by Jelmer Vernooij Add urlutils.split_subsegments.	437	"""Split the subsegment of the last segment of a URL.
	438
	439	:param url: A relative or absolute URL
	440	:return: (url, subsegments)
	441	"""
	442	(parent_url, child_dir) = split(url)
	443	subsegments = child_dir.split(",")
	444	if len(subsegments) == 1:
	445	return (url, [])
	446	return (join(parent_url, subsegments[0]), subsegments[1:])
	447
	448
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	449	def split_segment_parameters(url):
	450	"""Split the segment parameters of the last segment of a URL.
	451
	452	:param url: A relative or absolute URL
	453	:return: (url, segment_parameters)
	454	"""
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	455	(base_url, subsegments) = split_segment_parameters_raw(url)
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	456	parameters = {}
	457	for subsegment in subsegments:
	458	(key, value) = subsegment.split("=", 1)
	459	parameters[key] = value
	460	return (base_url, parameters)
	461
	462
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	463	def join_segment_parameters_raw(base, *subsegments):
5163.2.7 by Jelmer Vernooij Add type checking.	464	"""Create a new URL by adding subsegments to an existing one.
	465
	466	This adds the specified subsegments to the last path in the specified
	467	base URL. The subsegments should be bytestrings.
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	468
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	469	:note: You probably want to use join_segment_parameters instead.
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	470	"""
	471	if not subsegments:
	472	return base
	473	for subsegment in subsegments:
5163.2.7 by Jelmer Vernooij Add type checking.	474	if type(subsegment) is not str:
5163.2.7 by Jelmer Vernooij Add type checking.	475	raise TypeError("Subsegment %r is not a bytestring" % subsegment)
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	476	if "," in subsegment:
	477	raise errors.InvalidURLJoin(", exists in subsegments",
	478	base, subsegments)
	479	return ",".join((base,) + subsegments)
	480
	481
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	482	def join_segment_parameters(url, parameters):
	483	"""Create a new URL by adding segment parameters to an existing one.
	484
5163.2.7 by Jelmer Vernooij Add type checking.	485	The parameters of the last segment in the URL will be updated; if a
	486	parameter with the same key already exists it will be overwritten.
	487
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	488	:param url: A URL, as string
5163.2.7 by Jelmer Vernooij Add type checking.	489	:param parameters: Dictionary of parameters, keys and values as bytestrings
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	490	"""
	491	(base, existing_parameters) = split_segment_parameters(url)
	492	new_parameters = {}
	493	new_parameters.update(existing_parameters)
	494	for key, value in parameters.iteritems():
5163.2.7 by Jelmer Vernooij Add type checking.	495	if type(key) is not str:
	496	raise TypeError("parameter key %r is not a bytestring" % key)
	497	if type(value) is not str:
	498	raise TypeError("parameter value %r for %s is not a bytestring" %
	499	(key, value))
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	500	if "=" in key:
	501	raise errors.InvalidURLJoin("= exists in parameter key", url,
	502	parameters)
	503	new_parameters[key] = value
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	504	return join_segment_parameters_raw(base,
5163.2.6 by Jelmer Vernooij Fix example names in tests.	505	*["%s=%s" % item for item in sorted(new_parameters.items())])
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	506
	507
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	508	def _win32_strip_local_trailing_slash(url):
	509	"""Strip slashes after the drive letter"""
	510	if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
	511	return url[:-1]
	512	else:
	513	return url
	514
	515
1685.1.47 by John Arbash Meinel s comes before u	516	def strip_trailing_slash(url):
	517	"""Strip trailing slash, except for root paths.
	518
	519	The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	520	This assumes that all URLs are valid netloc urls, such that they
	521	form:
	522	scheme://host/path
	523	It searches for ://, and then refuses to remove the next '/'.
	524	It can also handle relative paths
	525	Examples:
	526	path/to/foo => path/to/foo
	527	path/to/foo/ => path/to/foo
	528	http://host/path/ => http://host/path
	529	http://host/path => http://host/path
	530	http://host/ => http://host/
	531	file:/// => file:///
	532	file:///foo/ => file:///foo
	533	# This is unique on win32 platforms, and is the only URL
	534	# format which does it differently.
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	535	file:///c\|/ => file:///c:/
1685.1.47 by John Arbash Meinel s comes before u	536	"""
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	537	if not url.endswith('/'):
	538	# Nothing to do
	539	return url
2245.6.1 by Alexander Belchenko win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //	540	if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	541	return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst more code cleanup	542
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	543	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	544	if scheme_loc is None:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	545	# This is a relative path, as it has no scheme
	546	# so just chop off the last character
1685.1.47 by John Arbash Meinel s comes before u	547	return url[:-1]
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	548
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	549	if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	550	# Don't chop off anything if the only slash is the path
	551	# separating slash
1685.1.47 by John Arbash Meinel s comes before u	552	return url
1685.1.47 by John Arbash Meinel s comes before u	553
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	554	return url[:-1]
	555
1685.1.47 by John Arbash Meinel s comes before u	556
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	557	def unescape(url):
	558	"""Unescape relpath from url format.
	559
	560	This returns a Unicode path from a URL
	561	"""
	562	# jam 20060427 URLs are supposed to be ASCII only strings
	563	# If they are passed in as unicode, urllib.unquote
	564	# will return a UNICODE string, which actually contains
	565	# utf-8 bytes. So we have to ensure that they are
	566	# plain ASCII strings, or the final .decode will
	567	# try to encode the UNICODE => ASCII, and then decode
	568	# it into utf-8.
	569	try:
	570	url = str(url)
	571	except UnicodeError, e:
	572	raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
1685.1.80 by Wouter van Heyst more code cleanup	573
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	574	unquoted = urllib.unquote(url)
	575	try:
	576	unicode_path = unquoted.decode('utf-8')
	577	except UnicodeError, e:
	578	raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
	579	return unicode_path
	580
	581
	582	# These are characters that if escaped, should stay that way
	583	_no_decode_chars = ';/?:@&=+$,#'
	584	_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	585	_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	586	+ ['%02X' % o for o in _no_decode_ords])
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	587	_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
	588	+ [('%02X' % o, chr(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	589	#These entries get mapped to themselves
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	590	_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	591
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	592	# These characters shouldn't be percent-encoded, and it's always safe to
	593	# unencode them if they are.
	594	_url_dont_escape_characters = set(
	595	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	596	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	597	"0123456789" # Numbers
	598	"-._~" # Unreserved characters
	599	)
	600
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	601	# These characters should not be escaped
2167.2.2 by Aaron Bentley Update safe character list	602	_url_safe_characters = set(
	603	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	604	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	605	"0123456789" # Numbers
	606	"_.-!~*'()" # Unreserved characters
	607	"/;?:@&=+$," # Reserved characters
	608	"%#" # Extra reserved characters
	609	)
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	610
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	611	def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	612	"""Decode what you can for a URL, so that we get a nice looking path.
	613
	614	This will turn file:// urls into local paths, and try to decode
	615	any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	616
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	617	Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	618	need to stay as escapes are left alone.
	619
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	620	:param url: A 7-bit ASCII URL
	621	:param encoding: The final output encoding
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	622
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	623	:return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	624	specified encoding.
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	625	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	626	if encoding is None:
	627	raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	628	if url.startswith('file://'):
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	629	try:
	630	path = local_path_from_url(url)
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	631	path.encode(encoding)
	632	return path
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	633	except UnicodeError:
	634	return url
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	635
	636	# Split into sections to try to decode utf-8
	637	res = url.split('/')
	638	for i in xrange(1, len(res)):
	639	escaped_chunks = res[i].split('%')
	640	for j in xrange(1, len(escaped_chunks)):
	641	item = escaped_chunks[j]
	642	try:
	643	escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
	644	except KeyError:
	645	# Put back the percent symbol
	646	escaped_chunks[j] = '%' + item
	647	except UnicodeDecodeError:
	648	escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
	649	unescaped = ''.join(escaped_chunks)
	650	try:
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	651	decoded = unescaped.decode('utf-8')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	652	except UnicodeDecodeError:
	653	# If this path segment cannot be properly utf-8 decoded
	654	# after doing unescaping we will just leave it alone
	655	pass
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	656	else:
	657	try:
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	658	decoded.encode(encoding)
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	659	except UnicodeEncodeError:
	660	# If this chunk cannot be encoded in the local
	661	# encoding, then we should leave it alone
	662	pass
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	663	else:
	664	# Otherwise take the url decoded one
	665	res[i] = decoded
	666	return u'/'.join(res)
2512.4.1 by Ian Clatworthy Fixes #115491 - 'branch lp:projname' now creates ./projname as exected	667
	668
	669	def derive_to_location(from_location):
	670	"""Derive a TO_LOCATION given a FROM_LOCATION.
	671
	672	The normal case is a FROM_LOCATION of http://foo/bar => bar.
	673	The Right Thing for some logical destinations may differ though
	674	because no / may be present at all. In that case, the result is
	675	the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
	676	This latter case also applies when a Windows drive
	677	is used without a path, e.g. c:foo-bar => foo-bar.
	678	If no /, path separator or : is found, the from_location is returned.
	679	"""
	680	if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
	681	return os.path.basename(from_location.rstrip("/\\"))
	682	else:
	683	sep = from_location.find(":")
	684	if sep > 0:
	685	return from_location[sep+1:]
	686	else:
	687	return from_location
3242.3.26 by Aaron Bentley Implement rebase_url	688
3242.3.35 by Aaron Bentley Cleanups and documentation	689
3242.3.26 by Aaron Bentley Implement rebase_url	690	def _is_absolute(url):
	691	return (osutils.pathjoin('/foo', url) == url)
	692
3242.3.35 by Aaron Bentley Cleanups and documentation	693
3242.3.26 by Aaron Bentley Implement rebase_url	694	def rebase_url(url, old_base, new_base):
	695	"""Convert a relative path from an old base URL to a new base URL.
	696
	697	The result will be a relative path.
	698	Absolute paths and full URLs are returned unaltered.
	699	"""
	700	scheme, separator = _find_scheme_and_separator(url)
	701	if scheme is not None:
	702	return url
	703	if _is_absolute(url):
	704	return url
	705	old_parsed = urlparse.urlparse(old_base)
	706	new_parsed = urlparse.urlparse(new_base)
	707	if (old_parsed[:2]) != (new_parsed[:2]):
3242.3.33 by Aaron Bentley Handle relative URL stacking cleanly	708	raise errors.InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley Updates from review comments	709	return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson urlutils.rebase_url handles '..' path segments in 'url'	710	join(old_parsed[2], url))
3242.3.26 by Aaron Bentley Implement rebase_url	711
	712
	713	def determine_relative_path(from_path, to_path):
	714	"""Determine a relative path from from_path to to_path."""
	715	from_segments = osutils.splitpath(from_path)
	716	to_segments = osutils.splitpath(to_path)
	717	count = -1
	718	for count, (from_element, to_element) in enumerate(zip(from_segments,
	719	to_segments)):
	720	if from_element != to_element:
	721	break
	722	else:
	723	count += 1
	724	unique_from = from_segments[count:]
	725	unique_to = to_segments[count:]
	726	segments = (['..'] * len(unique_from) + unique_to)
	727	if len(segments) == 0:
	728	return '.'
	729	return osutils.pathjoin(*segments)
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	730
	731
	732
	733	def parse_url(url):
	734	"""Extract the server address, the credentials and the path from the url.
	735
	736	user, password, host and path should be quoted if they contain reserved
	737	chars.
	738
	739	:param url: an quoted url
	740
	741	:return: (scheme, user, password, host, port, path) tuple, all fields
	742	are unquoted.
	743	"""
	744	if isinstance(url, unicode):
	745	raise errors.InvalidURL('should be ascii:\n%r' % url)
	746	url = url.encode('utf-8')
	747	(scheme, netloc, path, params,
	748	query, fragment) = urlparse.urlparse(url, allow_fragments=False)
	749	user = password = host = port = None
	750	if '@' in netloc:
	751	user, host = netloc.rsplit('@', 1)
	752	if ':' in user:
	753	user, password = user.split(':', 1)
	754	password = urllib.unquote(password)
	755	user = urllib.unquote(user)
	756	else:
	757	host = netloc
	758
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	759	if ':' in host and not (host[0] == '[' and host[-1] == ']'): #there is port
	760	host, port = host.rsplit(':',1)
	761	try:
	762	port = int(port)
	763	except ValueError:
	764	raise errors.InvalidURL('invalid port number %s in url:\n%s' %
	765	(port, url))
4253.4.3 by Jelmer Vernooij Support empty host name.	766	if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	767	host = host[1:-1]
3873.3.2 by Martin Pool Accept ipv6 literals in URLs	768
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	769	host = urllib.unquote(host)
	770	path = urllib.unquote(path)
	771
	772	return (scheme, user, password, host, port, path)