~bzr-pqm/bzr/bzr.dev : contents of bzrlib/urlutils.py at revision 5672.1.7

~bzr-pqm/bzr/bzr.dev : (revision 5672.1.7)

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2006-2010 Canonical Ltd
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	16
	17	"""A collection of function for handling URL operations."""
	18
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	19	import os
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	20	import re
	21	import sys
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	22
	23	from bzrlib.lazy_import import lazy_import
	24	lazy_import(globals(), """
	25	from posixpath import split as _posix_split, normpath as _posix_normpath
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	26	import urllib
3242.3.26 by Aaron Bentley Implement rebase_url	27	import urlparse
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	28
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	29	from bzrlib import (
	30	errors,
	31	osutils,
	32	)
	33	""")
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	34
	35
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	36	def basename(url, exclude_trailing_slash=True):
	37	"""Return the last component of a URL.
	38
	39	:param url: The URL in question
	40	:param exclude_trailing_slash: If the url looks like "path/to/foo/"
	41	ignore the final slash and return 'foo' rather than ''
	42	:return: Just the final component of the URL. This can return ''
	43	if you don't exclude_trailing_slash, or if you are at the
	44	root of the URL.
	45	"""
	46	return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
	47
	48
	49	def dirname(url, exclude_trailing_slash=True):
	50	"""Return the parent directory of the given path.
	51
	52	:param url: Relative or absolute URL
	53	:param exclude_trailing_slash: Remove a final slash
	54	(treat http://host/foo/ as http://host/foo, but
	55	http://host/ stays http://host/)
	56	:return: Everything in the URL except the last path chunk
	57	"""
	58	# TODO: jam 20060502 This was named dirname to be consistent
	59	# with the os functions, but maybe "parent" would be better
	60	return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
	61
	62
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	63	def escape(relpath):
	64	"""Escape relpath to be a valid url."""
	65	if isinstance(relpath, unicode):
	66	relpath = relpath.encode('utf-8')
	67	# After quoting and encoding, the path should be perfectly
	68	# safe as a plain ASCII string, str() just enforces this
4098.3.1 by Jonathan Lange Don't escape tildes	69	return str(urllib.quote(relpath, safe='/~'))
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	70
	71
1685.1.46 by John Arbash Meinel Sorting functions by name.	72	def file_relpath(base, path):
1685.1.46 by John Arbash Meinel Sorting functions by name.	73	"""Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	74
1685.1.46 by John Arbash Meinel Sorting functions by name.	75	This assumes that both paths are already fully specified file:// URLs.
1685.1.46 by John Arbash Meinel Sorting functions by name.	76	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	77	if len(base) < MIN_ABS_FILEURL_LENGTH:
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	78	raise ValueError('Length of base (%r) must equal or'
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	79	' exceed the platform minimum url length (which is %d)' %
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	80	(base, MIN_ABS_FILEURL_LENGTH))
1685.1.46 by John Arbash Meinel Sorting functions by name.	81	base = local_path_from_url(base)
1685.1.46 by John Arbash Meinel Sorting functions by name.	82	path = local_path_from_url(path)
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	83	return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel Sorting functions by name.	84
1685.1.46 by John Arbash Meinel Sorting functions by name.	85
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	86	def _find_scheme_and_separator(url):
	87	"""Find the scheme separator (://) and the first path separator
	88
	89	This is just a helper functions for other path utilities.
	90	It could probably be replaced by urlparse
	91	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	92	m = _url_scheme_re.match(url)
	93	if not m:
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	94	return None, None
	95
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	96	scheme = m.group('scheme')
	97	path = m.group('path')
	98
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	99	# Find the path separating slash
	100	# (first slash after the ://)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	101	first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	102	if first_path_slash == -1:
1685.1.56 by John Arbash Meinel Fixing _find_scheme_and_separator	103	return len(scheme), None
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	104	return len(scheme), first_path_slash+m.start('path')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	105
	106
5254.2.1 by Gordon Tyler Fixed how get_transport's convert_path_to_url tests whether a path is actually a URL.	107	def is_url(url):
	108	"""Tests whether a URL is in actual fact a URL."""
	109	return _url_scheme_re.match(url) is not None
	110
	111
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	112	def join(base, *args):
	113	"""Create a URL by joining sections.
	114
	115	This will normalize '..', assuming that paths are absolute
	116	(it assumes no symlinks in either path)
	117
	118	If any of *args is an absolute URL, it will be treated correctly.
	119	Example:
	120	join('http://foo', 'http://bar') => 'http://bar'
	121	join('http://foo', 'bar') => 'http://foo/bar'
	122	join('http://foo', 'bar', '../baz') => 'http://foo/baz'
	123	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	124	if not args:
	125	return base
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	126	scheme_end, path_start = _find_scheme_and_separator(base)
	127	if scheme_end is None and path_start is None:
	128	path_start = 0
	129	elif path_start is None:
	130	path_start = len(base)
	131	path = base[path_start:]
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	132	for arg in args:
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	133	arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)
	134	if arg_scheme_end is None and arg_path_start is None:
	135	arg_path_start = 0
	136	elif arg_path_start is None:
	137	arg_path_start = len(arg)
5254.1.5 by Gordon Tyler Fixes according to spiv's review.	138	if arg_scheme_end is not None:
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	139	base = arg
	140	path = arg[arg_path_start:]
	141	scheme_end = arg_scheme_end
	142	path_start = arg_path_start
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	143	else:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	144	path = joinpath(path, arg)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	145	return base[:path_start] + path
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	146
	147
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	148	def joinpath(base, *args):
	149	"""Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	150
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	151	This is somewhat like osutils.joinpath, but intended for URLs.
	152
	153	XXX: this duplicates some normalisation logic, and also duplicates a lot of
	154	path handling logic that already exists in some Transport implementations.
	155	We really should try to have exactly one place in the code base responsible
	156	for combining paths of URLs.
	157	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	158	path = base.split('/')
	159	if len(path) > 1 and path[-1] == '':
	160	#If the path ends in a trailing /, remove it.
	161	path.pop()
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	162	for arg in args:
	163	if arg.startswith('/'):
	164	path = []
	165	for chunk in arg.split('/'):
	166	if chunk == '.':
	167	continue
	168	elif chunk == '..':
	169	if path == ['']:
	170	raise errors.InvalidURLJoin('Cannot go above root',
	171	base, args)
	172	path.pop()
	173	else:
	174	path.append(chunk)
	175	if path == ['']:
	176	return '/'
	177	else:
	178	return '/'.join(path)
	179
	180
1685.1.46 by John Arbash Meinel Sorting functions by name.	181	# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
	182	def _posix_local_path_from_url(url):
	183	"""Convert a url like file:///path/to/foo into /path/to/foo"""
4828.1.1 by Michael Hudson test and fix	184	file_localhost_prefix = 'file://localhost/'
	185	if url.startswith(file_localhost_prefix):
	186	path = url[len(file_localhost_prefix) - 1:]
	187	elif not url.startswith('file:///'):
	188	raise errors.InvalidURL(
	189	url, 'local urls must start with file:/// or file://localhost/')
	190	else:
	191	path = url[len('file://'):]
1685.1.46 by John Arbash Meinel Sorting functions by name.	192	# We only strip off 2 slashes
4828.1.1 by Michael Hudson test and fix	193	return unescape(path)
1685.1.46 by John Arbash Meinel Sorting functions by name.	194
	195
	196	def _posix_local_path_to_url(path):
	197	"""Convert a local path like ./foo into a URL like file:///path/to/foo
	198
	199	This also handles transforming escaping unicode characters, etc.
	200	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	201	# importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel Sorting functions by name.	202	# on non-posix platforms
1711.4.5 by John Arbash Meinel the _posix_* routines should use posixpath not os.path, so tests pass on win32	203	return 'file://' + escape(_posix_normpath(
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	204	osutils._posix_abspath(path)))
1685.1.46 by John Arbash Meinel Sorting functions by name.	205
	206
	207	def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	208	"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	209	if not url.startswith('file://'):
	210	raise errors.InvalidURL(url, 'local urls must start with file:///, '
	211	'UNC path urls must start with file://')
1685.1.46 by John Arbash Meinel Sorting functions by name.	212	# We strip off all 3 slashes
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	213	win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	214	# check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	215	if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	216	if (win32_url[2] == '/'
	217	or win32_url[3] in '\|:'):
	218	raise errors.InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	219	' have form file://HOST/path')
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	220	return unescape(win32_url)
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	221
	222	# allow empty paths so we can serve all roots
	223	if win32_url == '///':
	224	return '/'
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	225
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	226	# usual local path with drive letter
5510.2.3 by Jared Bunting Changed _win32_local_path_from_url to not allow "file:///C:" form.	227	if (len(win32_url) < 6
5510.2.1 by Jared Bunting Modified _win32_local_path_from_url to:	228	or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
	229	'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	230	or win32_url[4] not in '\|:'
5510.2.3 by Jared Bunting Changed _win32_local_path_from_url to not allow "file:///C:" form.	231	or win32_url[5] != '/'):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	232	raise errors.InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	233	' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	234	return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel Sorting functions by name.	235
	236
	237	def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	238	"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel Sorting functions by name.	239
	240	This also handles transforming escaping unicode characters, etc.
	241	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	242	# importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	243	# on non-win32 platform
	244	# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
	245	# which actually strips trailing space characters.
5278.1.5 by Martin Pool Correct more sloppy use of the term 'Linux'	246	# The worst part is that on linux ntpath.abspath has different
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	247	# semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	248	if path == '/':
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	249	return 'file:///'
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	250
2279.4.2 by Alexander Belchenko Don't do normpath after abspath, because this function is called inside abspath	251	win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	252	# check for UNC path \\HOST\path
	253	if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	254	return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko ensure that local_path_to_url() always returns plain string, not unicode.	255	return ('file:///' + str(win32_path[0].upper()) + ':' +
	256	escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel Sorting functions by name.	257
	258
	259	local_path_to_url = _posix_local_path_to_url
	260	local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	261	MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel [merge] bzr.dev 1790	262	WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel Sorting functions by name.	263
	264	if sys.platform == 'win32':
	265	local_path_to_url = _win32_local_path_to_url
	266	local_path_from_url = _win32_local_path_from_url
	267
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	268	MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	269
	270
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	271	_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	272	_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
	273
	274
	275	def _unescape_safe_chars(matchobj):
	276	"""re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	277
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	278	e.g. '%7E' will be converted to '~'.
	279	"""
	280	hex_digits = matchobj.group(0)[1:]
	281	char = chr(int(hex_digits, 16))
	282	if char in _url_dont_escape_characters:
	283	return char
	284	else:
	285	return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	286
	287
	288	def normalize_url(url):
	289	"""Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	290
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	291	This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	292	special characters, etc.
	293
	294	It has two basic modes of operation, depending on whether the
	295	supplied string starts with a url specifier (scheme://) or not.
	296	If it does not have a specifier it is considered a local path,
	297	and will be converted into a file:/// url. Non-ascii characters
	298	will be encoded using utf-8.
	299	If it does have a url specifier, it will be treated as a "hybrid"
	300	URL. Basically, a URL that should have URL special characters already
	301	escaped (like +?&# etc), but may have unicode characters, etc
	302	which would not be valid in a real URL.
	303
	304	:param url: Either a hybrid URL or a local path
	305	:return: A normalized URL which only includes 7-bit ASCII characters.
	306	"""
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	307	scheme_end, path_start = _find_scheme_and_separator(url)
	308	if scheme_end is None:
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	309	return local_path_to_url(url)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	310	prefix = url[:path_start]
	311	path = url[path_start:]
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	312	if not isinstance(url, unicode):
	313	for c in url:
	314	if c not in _url_safe_characters:
1685.1.53 by John Arbash Meinel Updated normalize_url	315	raise errors.InvalidURL(url, 'URLs can only contain specific'
1685.1.53 by John Arbash Meinel Updated normalize_url	316	' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	317	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	318	return str(prefix + ''.join(path))
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	319
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	320	# We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	321	path_chars = list(path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	322
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	323	for i in xrange(len(path_chars)):
	324	if path_chars[i] not in _url_safe_characters:
	325	chars = path_chars[i].encode('utf-8')
	326	path_chars[i] = ''.join(
	327	['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
	328	path = ''.join(path_chars)
	329	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
5254.1.1 by Gordon Tyler Added support to urlutils for URLs such as Launchpad's lp:foobar.	330	return str(prefix + path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	331
	332
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	333	def relative_url(base, other):
	334	"""Return a path to other from base.
	335
	336	If other is unrelated to base, return other. Else return a relative path.
	337	This assumes no symlinks as part of the url.
	338	"""
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	339	dummy, base_first_slash = _find_scheme_and_separator(base)
	340	if base_first_slash is None:
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	341	return other
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	342
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	343	dummy, other_first_slash = _find_scheme_and_separator(other)
	344	if other_first_slash is None:
	345	return other
	346
	347	# this takes care of differing schemes or hosts
	348	base_scheme = base[:base_first_slash]
	349	other_scheme = other[:other_first_slash]
	350	if base_scheme != other_scheme:
	351	return other
3139.2.1 by Alexander Belchenko bugfix #90847: fix problem with parent location on another logical drive	352	elif sys.platform == 'win32' and base_scheme == 'file://':
	353	base_drive = base[base_first_slash+1:base_first_slash+3]
	354	other_drive = other[other_first_slash+1:other_first_slash+3]
	355	if base_drive != other_drive:
	356	return other
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	357
	358	base_path = base[base_first_slash+1:]
	359	other_path = other[other_first_slash+1:]
	360
	361	if base_path.endswith('/'):
	362	base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	363
	364	base_sections = base_path.split('/')
	365	other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	366
	367	if base_sections == ['']:
	368	base_sections = []
	369	if other_sections == ['']:
	370	other_sections = []
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	371
	372	output_sections = []
	373	for b, o in zip(base_sections, other_sections):
	374	if b != o:
	375	break
	376	output_sections.append(b)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	377
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	378	match_len = len(output_sections)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	379	output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	380	output_sections.extend(other_sections[match_len:])
	381
	382	return "/".join(output_sections) or "."
	383
	384
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	385	def _win32_extract_drive_letter(url_base, path):
	386	"""On win32 the drive letter needs to be added to the url base."""
	387	# Strip off the drive letter
	388	# path is currently /C:/foo
	389	if len(path) < 3 or path[2] not in ':\|' or path[3] != '/':
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	390	raise errors.InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	391	'win32 file:/// paths need a drive letter')
	392	url_base += path[0:3] # file:// + /C:
	393	path = path[3:] # /foo
	394	return url_base, path
	395
	396
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	397	def split(url, exclude_trailing_slash=True):
	398	"""Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	399
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	400	:param url: A relative or absolute URL
	401	:param exclude_trailing_slash: Strip off a final '/' if it is part
	402	of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	403
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	404	:return: (parent_url, child_dir). child_dir may be the empty string if we're at
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	405	the root.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	406	"""
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	407	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	408
	409	if first_path_slash is None:
	410	# We have either a relative path, or no separating slash
	411	if scheme_loc is None:
	412	# Relative path
	413	if exclude_trailing_slash and url.endswith('/'):
	414	url = url[:-1]
	415	return _posix_split(url)
	416	else:
	417	# Scheme with no path
	418	return url, ''
	419
	420	# We have a fully defined path
	421	url_base = url[:first_path_slash] # http://host, file://
	422	path = url[first_path_slash:] # /file/foo
	423
	424	if sys.platform == 'win32' and url.startswith('file:///'):
	425	# Strip off the drive letter
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	426	# url_base is currently file://
1711.2.39 by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths.	427	# path is currently /C:/foo
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	428	url_base, path = _win32_extract_drive_letter(url_base, path)
	429	# now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	430
	431	if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
	432	path = path[:-1]
	433	head, tail = _posix_split(path)
	434	return url_base + head, tail
	435
1685.1.46 by John Arbash Meinel Sorting functions by name.	436
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	437	def split_segment_parameters_raw(url):
5163.2.1 by Jelmer Vernooij Add urlutils.split_subsegments.	438	"""Split the subsegment of the last segment of a URL.
	439
	440	:param url: A relative or absolute URL
	441	:return: (url, subsegments)
	442	"""
	443	(parent_url, child_dir) = split(url)
	444	subsegments = child_dir.split(",")
	445	if len(subsegments) == 1:
	446	return (url, [])
	447	return (join(parent_url, subsegments[0]), subsegments[1:])
	448
	449
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	450	def split_segment_parameters(url):
	451	"""Split the segment parameters of the last segment of a URL.
	452
	453	:param url: A relative or absolute URL
	454	:return: (url, segment_parameters)
	455	"""
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	456	(base_url, subsegments) = split_segment_parameters_raw(url)
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	457	parameters = {}
	458	for subsegment in subsegments:
	459	(key, value) = subsegment.split("=", 1)
	460	parameters[key] = value
	461	return (base_url, parameters)
	462
	463
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	464	def join_segment_parameters_raw(base, *subsegments):
5163.2.7 by Jelmer Vernooij Add type checking.	465	"""Create a new URL by adding subsegments to an existing one.
	466
	467	This adds the specified subsegments to the last path in the specified
	468	base URL. The subsegments should be bytestrings.
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	469
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	470	:note: You probably want to use join_segment_parameters instead.
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	471	"""
	472	if not subsegments:
	473	return base
	474	for subsegment in subsegments:
5163.2.7 by Jelmer Vernooij Add type checking.	475	if type(subsegment) is not str:
5163.2.7 by Jelmer Vernooij Add type checking.	476	raise TypeError("Subsegment %r is not a bytestring" % subsegment)
5163.2.2 by Jelmer Vernooij Add bzrlib.urlutils.join_subsegments.	477	if "," in subsegment:
	478	raise errors.InvalidURLJoin(", exists in subsegments",
	479	base, subsegments)
	480	return ",".join((base,) + subsegments)
	481
	482
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	483	def join_segment_parameters(url, parameters):
	484	"""Create a new URL by adding segment parameters to an existing one.
	485
5163.2.7 by Jelmer Vernooij Add type checking.	486	The parameters of the last segment in the URL will be updated; if a
	487	parameter with the same key already exists it will be overwritten.
	488
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	489	:param url: A URL, as string
5163.2.7 by Jelmer Vernooij Add type checking.	490	:param parameters: Dictionary of parameters, keys and values as bytestrings
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	491	"""
	492	(base, existing_parameters) = split_segment_parameters(url)
	493	new_parameters = {}
	494	new_parameters.update(existing_parameters)
	495	for key, value in parameters.iteritems():
5163.2.7 by Jelmer Vernooij Add type checking.	496	if type(key) is not str:
	497	raise TypeError("parameter key %r is not a bytestring" % key)
	498	if type(value) is not str:
	499	raise TypeError("parameter value %r for %s is not a bytestring" %
	500	(key, value))
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	501	if "=" in key:
	502	raise errors.InvalidURLJoin("= exists in parameter key", url,
	503	parameters)
	504	new_parameters[key] = value
5163.2.5 by Jelmer Vernooij rename {split,join}_subsegments -> {split,join}_segment_parameters_raw and add more tests.	505	return join_segment_parameters_raw(base,
5163.2.6 by Jelmer Vernooij Fix example names in tests.	506	*["%s=%s" % item for item in sorted(new_parameters.items())])
5163.2.3 by Jelmer Vernooij Add join_segment_parameters / split_segment_parameters.	507
	508
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	509	def _win32_strip_local_trailing_slash(url):
	510	"""Strip slashes after the drive letter"""
	511	if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
	512	return url[:-1]
	513	else:
	514	return url
	515
	516
1685.1.47 by John Arbash Meinel s comes before u	517	def strip_trailing_slash(url):
	518	"""Strip trailing slash, except for root paths.
	519
	520	The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	521	This assumes that all URLs are valid netloc urls, such that they
	522	form:
	523	scheme://host/path
	524	It searches for ://, and then refuses to remove the next '/'.
	525	It can also handle relative paths
	526	Examples:
	527	path/to/foo => path/to/foo
	528	path/to/foo/ => path/to/foo
	529	http://host/path/ => http://host/path
	530	http://host/path => http://host/path
	531	http://host/ => http://host/
	532	file:/// => file:///
	533	file:///foo/ => file:///foo
	534	# This is unique on win32 platforms, and is the only URL
	535	# format which does it differently.
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	536	file:///c\|/ => file:///c:/
1685.1.47 by John Arbash Meinel s comes before u	537	"""
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	538	if not url.endswith('/'):
	539	# Nothing to do
	540	return url
2245.6.1 by Alexander Belchenko win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //	541	if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	542	return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst more code cleanup	543
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	544	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	545	if scheme_loc is None:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	546	# This is a relative path, as it has no scheme
	547	# so just chop off the last character
1685.1.47 by John Arbash Meinel s comes before u	548	return url[:-1]
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	549
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	550	if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	551	# Don't chop off anything if the only slash is the path
	552	# separating slash
1685.1.47 by John Arbash Meinel s comes before u	553	return url
1685.1.47 by John Arbash Meinel s comes before u	554
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	555	return url[:-1]
	556
1685.1.47 by John Arbash Meinel s comes before u	557
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	558	def unescape(url):
	559	"""Unescape relpath from url format.
	560
	561	This returns a Unicode path from a URL
	562	"""
	563	# jam 20060427 URLs are supposed to be ASCII only strings
	564	# If they are passed in as unicode, urllib.unquote
	565	# will return a UNICODE string, which actually contains
	566	# utf-8 bytes. So we have to ensure that they are
	567	# plain ASCII strings, or the final .decode will
	568	# try to encode the UNICODE => ASCII, and then decode
	569	# it into utf-8.
	570	try:
	571	url = str(url)
	572	except UnicodeError, e:
	573	raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
1685.1.80 by Wouter van Heyst more code cleanup	574
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	575	unquoted = urllib.unquote(url)
	576	try:
	577	unicode_path = unquoted.decode('utf-8')
	578	except UnicodeError, e:
	579	raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
	580	return unicode_path
	581
	582
	583	# These are characters that if escaped, should stay that way
	584	_no_decode_chars = ';/?:@&=+$,#'
	585	_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	586	_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	587	+ ['%02X' % o for o in _no_decode_ords])
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	588	_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
	589	+ [('%02X' % o, chr(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	590	#These entries get mapped to themselves
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	591	_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	592
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	593	# These characters shouldn't be percent-encoded, and it's always safe to
	594	# unencode them if they are.
	595	_url_dont_escape_characters = set(
	596	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	597	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	598	"0123456789" # Numbers
	599	"-._~" # Unreserved characters
	600	)
	601
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	602	# These characters should not be escaped
2167.2.2 by Aaron Bentley Update safe character list	603	_url_safe_characters = set(
	604	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	605	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	606	"0123456789" # Numbers
	607	"_.-!~*'()" # Unreserved characters
	608	"/;?:@&=+$," # Reserved characters
	609	"%#" # Extra reserved characters
	610	)
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	611
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	612	def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	613	"""Decode what you can for a URL, so that we get a nice looking path.
	614
	615	This will turn file:// urls into local paths, and try to decode
	616	any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	617
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	618	Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	619	need to stay as escapes are left alone.
	620
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	621	:param url: A 7-bit ASCII URL
	622	:param encoding: The final output encoding
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	623
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	624	:return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	625	specified encoding.
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	626	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	627	if encoding is None:
	628	raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	629	if url.startswith('file://'):
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	630	try:
	631	path = local_path_from_url(url)
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	632	path.encode(encoding)
	633	return path
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	634	except UnicodeError:
	635	return url
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	636
	637	# Split into sections to try to decode utf-8
	638	res = url.split('/')
	639	for i in xrange(1, len(res)):
	640	escaped_chunks = res[i].split('%')
	641	for j in xrange(1, len(escaped_chunks)):
	642	item = escaped_chunks[j]
	643	try:
	644	escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
	645	except KeyError:
	646	# Put back the percent symbol
	647	escaped_chunks[j] = '%' + item
	648	except UnicodeDecodeError:
	649	escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
	650	unescaped = ''.join(escaped_chunks)
	651	try:
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	652	decoded = unescaped.decode('utf-8')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	653	except UnicodeDecodeError:
	654	# If this path segment cannot be properly utf-8 decoded
	655	# after doing unescaping we will just leave it alone
	656	pass
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	657	else:
	658	try:
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	659	decoded.encode(encoding)
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	660	except UnicodeEncodeError:
	661	# If this chunk cannot be encoded in the local
	662	# encoding, then we should leave it alone
	663	pass
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	664	else:
	665	# Otherwise take the url decoded one
	666	res[i] = decoded
	667	return u'/'.join(res)
2512.4.1 by Ian Clatworthy Fixes #115491 - 'branch lp:projname' now creates ./projname as exected	668
	669
	670	def derive_to_location(from_location):
	671	"""Derive a TO_LOCATION given a FROM_LOCATION.
	672
	673	The normal case is a FROM_LOCATION of http://foo/bar => bar.
	674	The Right Thing for some logical destinations may differ though
	675	because no / may be present at all. In that case, the result is
	676	the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
	677	This latter case also applies when a Windows drive
	678	is used without a path, e.g. c:foo-bar => foo-bar.
	679	If no /, path separator or : is found, the from_location is returned.
	680	"""
	681	if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
	682	return os.path.basename(from_location.rstrip("/\\"))
	683	else:
	684	sep = from_location.find(":")
	685	if sep > 0:
	686	return from_location[sep+1:]
	687	else:
	688	return from_location
3242.3.26 by Aaron Bentley Implement rebase_url	689
3242.3.35 by Aaron Bentley Cleanups and documentation	690
3242.3.26 by Aaron Bentley Implement rebase_url	691	def _is_absolute(url):
	692	return (osutils.pathjoin('/foo', url) == url)
	693
3242.3.35 by Aaron Bentley Cleanups and documentation	694
3242.3.26 by Aaron Bentley Implement rebase_url	695	def rebase_url(url, old_base, new_base):
	696	"""Convert a relative path from an old base URL to a new base URL.
	697
	698	The result will be a relative path.
	699	Absolute paths and full URLs are returned unaltered.
	700	"""
	701	scheme, separator = _find_scheme_and_separator(url)
	702	if scheme is not None:
	703	return url
	704	if _is_absolute(url):
	705	return url
	706	old_parsed = urlparse.urlparse(old_base)
	707	new_parsed = urlparse.urlparse(new_base)
	708	if (old_parsed[:2]) != (new_parsed[:2]):
3242.3.33 by Aaron Bentley Handle relative URL stacking cleanly	709	raise errors.InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley Updates from review comments	710	return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson urlutils.rebase_url handles '..' path segments in 'url'	711	join(old_parsed[2], url))
3242.3.26 by Aaron Bentley Implement rebase_url	712
	713
	714	def determine_relative_path(from_path, to_path):
	715	"""Determine a relative path from from_path to to_path."""
	716	from_segments = osutils.splitpath(from_path)
	717	to_segments = osutils.splitpath(to_path)
	718	count = -1
	719	for count, (from_element, to_element) in enumerate(zip(from_segments,
	720	to_segments)):
	721	if from_element != to_element:
	722	break
	723	else:
	724	count += 1
	725	unique_from = from_segments[count:]
	726	unique_to = to_segments[count:]
	727	segments = (['..'] * len(unique_from) + unique_to)
	728	if len(segments) == 0:
	729	return '.'
	730	return osutils.pathjoin(*segments)
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	731
	732
	733
	734	def parse_url(url):
	735	"""Extract the server address, the credentials and the path from the url.
	736
	737	user, password, host and path should be quoted if they contain reserved
	738	chars.
	739
	740	:param url: an quoted url
	741
	742	:return: (scheme, user, password, host, port, path) tuple, all fields
	743	are unquoted.
	744	"""
	745	if isinstance(url, unicode):
	746	raise errors.InvalidURL('should be ascii:\n%r' % url)
	747	url = url.encode('utf-8')
	748	(scheme, netloc, path, params,
	749	query, fragment) = urlparse.urlparse(url, allow_fragments=False)
	750	user = password = host = port = None
	751	if '@' in netloc:
	752	user, host = netloc.rsplit('@', 1)
	753	if ':' in user:
	754	user, password = user.split(':', 1)
	755	password = urllib.unquote(password)
	756	user = urllib.unquote(user)
	757	else:
	758	host = netloc
	759
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	760	if ':' in host and not (host[0] == '[' and host[-1] == ']'): #there is port
	761	host, port = host.rsplit(':',1)
	762	try:
	763	port = int(port)
	764	except ValueError:
	765	raise errors.InvalidURL('invalid port number %s in url:\n%s' %
	766	(port, url))
4253.4.3 by Jelmer Vernooij Support empty host name.	767	if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	768	host = host[1:-1]
3873.3.2 by Martin Pool Accept ipv6 literals in URLs	769
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	770	host = urllib.unquote(host)
	771	path = urllib.unquote(path)
	772
	773	return (scheme, user, password, host, port, path)