~bzr-pqm/bzr/bzr.dev : contents of bzrlib/urlutils.py at revision 5053.1.1

~bzr-pqm/bzr/bzr.dev : (revision 5053.1.1)

4763.2.4 by John Arbash Meinel merge bzr.2.1 in preparation for NEWS entry.	1	# Copyright (C) 2006-2010 Canonical Ltd
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	16
	17	"""A collection of function for handling URL operations."""
	18
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	19	import os
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	20	import re
	21	import sys
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	22
	23	from bzrlib.lazy_import import lazy_import
	24	lazy_import(globals(), """
	25	from posixpath import split as _posix_split, normpath as _posix_normpath
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	26	import urllib
3242.3.26 by Aaron Bentley Implement rebase_url	27	import urlparse
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	28
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	29	from bzrlib import (
	30	errors,
	31	osutils,
	32	)
	33	""")
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	34
	35
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	36	def basename(url, exclude_trailing_slash=True):
	37	"""Return the last component of a URL.
	38
	39	:param url: The URL in question
	40	:param exclude_trailing_slash: If the url looks like "path/to/foo/"
	41	ignore the final slash and return 'foo' rather than ''
	42	:return: Just the final component of the URL. This can return ''
	43	if you don't exclude_trailing_slash, or if you are at the
	44	root of the URL.
	45	"""
	46	return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
	47
	48
	49	def dirname(url, exclude_trailing_slash=True):
	50	"""Return the parent directory of the given path.
	51
	52	:param url: Relative or absolute URL
	53	:param exclude_trailing_slash: Remove a final slash
	54	(treat http://host/foo/ as http://host/foo, but
	55	http://host/ stays http://host/)
	56	:return: Everything in the URL except the last path chunk
	57	"""
	58	# TODO: jam 20060502 This was named dirname to be consistent
	59	# with the os functions, but maybe "parent" would be better
	60	return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
	61
	62
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	63	def escape(relpath):
	64	"""Escape relpath to be a valid url."""
	65	if isinstance(relpath, unicode):
	66	relpath = relpath.encode('utf-8')
	67	# After quoting and encoding, the path should be perfectly
	68	# safe as a plain ASCII string, str() just enforces this
4098.3.1 by Jonathan Lange Don't escape tildes	69	return str(urllib.quote(relpath, safe='/~'))
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	70
	71
1685.1.46 by John Arbash Meinel Sorting functions by name.	72	def file_relpath(base, path):
1685.1.46 by John Arbash Meinel Sorting functions by name.	73	"""Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	74
1685.1.46 by John Arbash Meinel Sorting functions by name.	75	This assumes that both paths are already fully specified file:// URLs.
1685.1.46 by John Arbash Meinel Sorting functions by name.	76	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	77	if len(base) < MIN_ABS_FILEURL_LENGTH:
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	78	raise ValueError('Length of base (%r) must equal or'
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	79	' exceed the platform minimum url length (which is %d)' %
4539.1.1 by Andrew Bennetts Improve error message in osutils.file_relpath.	80	(base, MIN_ABS_FILEURL_LENGTH))
1685.1.46 by John Arbash Meinel Sorting functions by name.	81	base = local_path_from_url(base)
1685.1.46 by John Arbash Meinel Sorting functions by name.	82	path = local_path_from_url(path)
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	83	return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel Sorting functions by name.	84
1685.1.46 by John Arbash Meinel Sorting functions by name.	85
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	86	def _find_scheme_and_separator(url):
	87	"""Find the scheme separator (://) and the first path separator
	88
	89	This is just a helper functions for other path utilities.
	90	It could probably be replaced by urlparse
	91	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	92	m = _url_scheme_re.match(url)
	93	if not m:
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	94	return None, None
	95
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	96	scheme = m.group('scheme')
	97	path = m.group('path')
	98
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	99	# Find the path separating slash
	100	# (first slash after the ://)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	101	first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	102	if first_path_slash == -1:
1685.1.56 by John Arbash Meinel Fixing _find_scheme_and_separator	103	return len(scheme), None
	104	return len(scheme), first_path_slash+len(scheme)+3
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	105
	106
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	107	def join(base, *args):
	108	"""Create a URL by joining sections.
	109
	110	This will normalize '..', assuming that paths are absolute
	111	(it assumes no symlinks in either path)
	112
	113	If any of *args is an absolute URL, it will be treated correctly.
	114	Example:
	115	join('http://foo', 'http://bar') => 'http://bar'
	116	join('http://foo', 'bar') => 'http://foo/bar'
	117	join('http://foo', 'bar', '../baz') => 'http://foo/baz'
	118	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	119	if not args:
	120	return base
	121	match = _url_scheme_re.match(base)
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	122	scheme = None
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	123	if match:
	124	scheme = match.group('scheme')
	125	path = match.group('path').split('/')
1711.2.49 by John Arbash Meinel urlutils.join should work for root paths.	126	if path[-1:] == ['']:
	127	# Strip off a trailing slash
	128	# This helps both when we are at the root, and when
	129	# 'base' has an extra slash at the end
	130	path = path[:-1]
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	131	else:
	132	path = base.split('/')
	133
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	134	if scheme is not None and len(path) >= 1:
2018.5.93 by Andrew Bennetts Fix another bug in urlutils.join.	135	host = path[:1]
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	136	# the path should be represented as an abs path.
	137	# we know this must be absolute because of the presence of a URL scheme.
	138	remove_root = True
	139	path = [''] + path[1:]
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	140	else:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	141	# create an empty host, but dont alter the path - this might be a
	142	# relative url fragment.
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	143	host = []
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	144	remove_root = False
	145
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	146	for arg in args:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	147	match = _url_scheme_re.match(arg)
	148	if match:
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	149	# Absolute URL
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	150	scheme = match.group('scheme')
1986.1.10 by Robert Collins Merge from bzr.dev, fixing found bugs handling 'has('/')' in MemoryTransport and SFTP transports.	151	# this skips .. normalisation, making http://host/../../..
	152	# be rather strange.
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	153	path = match.group('path').split('/')
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	154	# set the host and path according to new absolute URL, discarding
	155	# any previous values.
	156	# XXX: duplicates mess from earlier in this function. This URL
	157	# manipulation code needs some cleaning up.
	158	if scheme is not None and len(path) >= 1:
2018.5.92 by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.	159	host = path[:1]
	160	path = path[1:]
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	161	# url scheme implies absolute path.
	162	path = [''] + path
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	163	else:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	164	# no url scheme we take the path as is.
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	165	host = []
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	166	else:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	167	path = '/'.join(path)
	168	path = joinpath(path, arg)
	169	path = path.split('/')
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	170	if remove_root and path[0:1] == ['']:
	171	del path[0]
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	172	if host:
2018.5.92 by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.	173	# Remove the leading slash from the path, so long as it isn't also the
	174	# trailing slash, which we want to keep if present.
	175	if path and path[0] == '' and len(path) > 1:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	176	del path[0]
	177	path = host + path
1685.1.80 by Wouter van Heyst more code cleanup	178
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	179	if scheme is None:
	180	return '/'.join(path)
	181	return scheme + '://' + '/'.join(path)
	182
	183
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	184	def joinpath(base, *args):
	185	"""Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	186
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	187	This is somewhat like osutils.joinpath, but intended for URLs.
	188
	189	XXX: this duplicates some normalisation logic, and also duplicates a lot of
	190	path handling logic that already exists in some Transport implementations.
	191	We really should try to have exactly one place in the code base responsible
	192	for combining paths of URLs.
	193	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	194	path = base.split('/')
	195	if len(path) > 1 and path[-1] == '':
	196	#If the path ends in a trailing /, remove it.
	197	path.pop()
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	198	for arg in args:
	199	if arg.startswith('/'):
	200	path = []
	201	for chunk in arg.split('/'):
	202	if chunk == '.':
	203	continue
	204	elif chunk == '..':
	205	if path == ['']:
	206	raise errors.InvalidURLJoin('Cannot go above root',
	207	base, args)
	208	path.pop()
	209	else:
	210	path.append(chunk)
	211	if path == ['']:
	212	return '/'
	213	else:
	214	return '/'.join(path)
	215
	216
1685.1.46 by John Arbash Meinel Sorting functions by name.	217	# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
	218	def _posix_local_path_from_url(url):
	219	"""Convert a url like file:///path/to/foo into /path/to/foo"""
4828.1.1 by Michael Hudson test and fix	220	file_localhost_prefix = 'file://localhost/'
	221	if url.startswith(file_localhost_prefix):
	222	path = url[len(file_localhost_prefix) - 1:]
	223	elif not url.startswith('file:///'):
	224	raise errors.InvalidURL(
	225	url, 'local urls must start with file:/// or file://localhost/')
	226	else:
	227	path = url[len('file://'):]
1685.1.46 by John Arbash Meinel Sorting functions by name.	228	# We only strip off 2 slashes
4828.1.1 by Michael Hudson test and fix	229	return unescape(path)
1685.1.46 by John Arbash Meinel Sorting functions by name.	230
	231
	232	def _posix_local_path_to_url(path):
	233	"""Convert a local path like ./foo into a URL like file:///path/to/foo
	234
	235	This also handles transforming escaping unicode characters, etc.
	236	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	237	# importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel Sorting functions by name.	238	# on non-posix platforms
1711.4.5 by John Arbash Meinel the _posix_* routines should use posixpath not os.path, so tests pass on win32	239	return 'file://' + escape(_posix_normpath(
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	240	osutils._posix_abspath(path)))
1685.1.46 by John Arbash Meinel Sorting functions by name.	241
	242
	243	def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	244	"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	245	if not url.startswith('file://'):
	246	raise errors.InvalidURL(url, 'local urls must start with file:///, '
	247	'UNC path urls must start with file://')
1685.1.46 by John Arbash Meinel Sorting functions by name.	248	# We strip off all 3 slashes
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	249	win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	250	# check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	251	if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	252	if (win32_url[2] == '/'
	253	or win32_url[3] in '\|:'):
	254	raise errors.InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	255	' have form file://HOST/path')
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	256	return unescape(win32_url)
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	257
	258	# allow empty paths so we can serve all roots
	259	if win32_url == '///':
	260	return '/'
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	261
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	262	# usual local path with drive letter
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	263	if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	264	'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	265	or win32_url[4] not in '\|:'
	266	or win32_url[5] != '/'):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	267	raise errors.InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	268	' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	269	return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel Sorting functions by name.	270
	271
	272	def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	273	"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel Sorting functions by name.	274
	275	This also handles transforming escaping unicode characters, etc.
	276	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	277	# importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	278	# on non-win32 platform
	279	# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
	280	# which actually strips trailing space characters.
	281	# The worst part is that under linux ntpath.abspath has different
	282	# semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	283	if path == '/':
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	284	return 'file:///'
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	285
2279.4.2 by Alexander Belchenko Don't do normpath after abspath, because this function is called inside abspath	286	win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	287	# check for UNC path \\HOST\path
	288	if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	289	return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko ensure that local_path_to_url() always returns plain string, not unicode.	290	return ('file:///' + str(win32_path[0].upper()) + ':' +
	291	escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel Sorting functions by name.	292
	293
	294	local_path_to_url = _posix_local_path_to_url
	295	local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	296	MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel [merge] bzr.dev 1790	297	WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel Sorting functions by name.	298
	299	if sys.platform == 'win32':
	300	local_path_to_url = _win32_local_path_to_url
	301	local_path_from_url = _win32_local_path_from_url
	302
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	303	MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	304
	305
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	306	_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	307	_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
	308
	309
	310	def _unescape_safe_chars(matchobj):
	311	"""re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	312
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	313	e.g. '%7E' will be converted to '~'.
	314	"""
	315	hex_digits = matchobj.group(0)[1:]
	316	char = chr(int(hex_digits, 16))
	317	if char in _url_dont_escape_characters:
	318	return char
	319	else:
	320	return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	321
	322
	323	def normalize_url(url):
	324	"""Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	325
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	326	This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	327	special characters, etc.
	328
	329	It has two basic modes of operation, depending on whether the
	330	supplied string starts with a url specifier (scheme://) or not.
	331	If it does not have a specifier it is considered a local path,
	332	and will be converted into a file:/// url. Non-ascii characters
	333	will be encoded using utf-8.
	334	If it does have a url specifier, it will be treated as a "hybrid"
	335	URL. Basically, a URL that should have URL special characters already
	336	escaped (like +?&# etc), but may have unicode characters, etc
	337	which would not be valid in a real URL.
	338
	339	:param url: Either a hybrid URL or a local path
	340	:return: A normalized URL which only includes 7-bit ASCII characters.
	341	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	342	m = _url_scheme_re.match(url)
	343	if not m:
	344	return local_path_to_url(url)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	345	scheme = m.group('scheme')
	346	path = m.group('path')
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	347	if not isinstance(url, unicode):
	348	for c in url:
	349	if c not in _url_safe_characters:
1685.1.53 by John Arbash Meinel Updated normalize_url	350	raise errors.InvalidURL(url, 'URLs can only contain specific'
1685.1.53 by John Arbash Meinel Updated normalize_url	351	' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	352	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
	353	return str(scheme + '://' + ''.join(path))
	354
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	355	# We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	356	path_chars = list(path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	357
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	358	for i in xrange(len(path_chars)):
	359	if path_chars[i] not in _url_safe_characters:
	360	chars = path_chars[i].encode('utf-8')
	361	path_chars[i] = ''.join(
	362	['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
	363	path = ''.join(path_chars)
	364	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
	365	return str(scheme + '://' + path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	366
	367
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	368	def relative_url(base, other):
	369	"""Return a path to other from base.
	370
	371	If other is unrelated to base, return other. Else return a relative path.
	372	This assumes no symlinks as part of the url.
	373	"""
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	374	dummy, base_first_slash = _find_scheme_and_separator(base)
	375	if base_first_slash is None:
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	376	return other
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	377
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	378	dummy, other_first_slash = _find_scheme_and_separator(other)
	379	if other_first_slash is None:
	380	return other
	381
	382	# this takes care of differing schemes or hosts
	383	base_scheme = base[:base_first_slash]
	384	other_scheme = other[:other_first_slash]
	385	if base_scheme != other_scheme:
	386	return other
3139.2.1 by Alexander Belchenko bugfix #90847: fix problem with parent location on another logical drive	387	elif sys.platform == 'win32' and base_scheme == 'file://':
	388	base_drive = base[base_first_slash+1:base_first_slash+3]
	389	other_drive = other[other_first_slash+1:other_first_slash+3]
	390	if base_drive != other_drive:
	391	return other
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	392
	393	base_path = base[base_first_slash+1:]
	394	other_path = other[other_first_slash+1:]
	395
	396	if base_path.endswith('/'):
	397	base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	398
	399	base_sections = base_path.split('/')
	400	other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	401
	402	if base_sections == ['']:
	403	base_sections = []
	404	if other_sections == ['']:
	405	other_sections = []
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	406
	407	output_sections = []
	408	for b, o in zip(base_sections, other_sections):
	409	if b != o:
	410	break
	411	output_sections.append(b)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	412
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	413	match_len = len(output_sections)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	414	output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	415	output_sections.extend(other_sections[match_len:])
	416
	417	return "/".join(output_sections) or "."
	418
	419
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	420	def _win32_extract_drive_letter(url_base, path):
	421	"""On win32 the drive letter needs to be added to the url base."""
	422	# Strip off the drive letter
	423	# path is currently /C:/foo
	424	if len(path) < 3 or path[2] not in ':\|' or path[3] != '/':
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	425	raise errors.InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	426	'win32 file:/// paths need a drive letter')
	427	url_base += path[0:3] # file:// + /C:
	428	path = path[3:] # /foo
	429	return url_base, path
	430
	431
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	432	def split(url, exclude_trailing_slash=True):
	433	"""Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	434
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	435	:param url: A relative or absolute URL
	436	:param exclude_trailing_slash: Strip off a final '/' if it is part
	437	of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	438
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	439	:return: (parent_url, child_dir). child_dir may be the empty string if we're at
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	440	the root.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	441	"""
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	442	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	443
	444	if first_path_slash is None:
	445	# We have either a relative path, or no separating slash
	446	if scheme_loc is None:
	447	# Relative path
	448	if exclude_trailing_slash and url.endswith('/'):
	449	url = url[:-1]
	450	return _posix_split(url)
	451	else:
	452	# Scheme with no path
	453	return url, ''
	454
	455	# We have a fully defined path
	456	url_base = url[:first_path_slash] # http://host, file://
	457	path = url[first_path_slash:] # /file/foo
	458
	459	if sys.platform == 'win32' and url.startswith('file:///'):
	460	# Strip off the drive letter
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	461	# url_base is currently file://
1711.2.39 by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths.	462	# path is currently /C:/foo
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	463	url_base, path = _win32_extract_drive_letter(url_base, path)
	464	# now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	465
	466	if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
	467	path = path[:-1]
	468	head, tail = _posix_split(path)
	469	return url_base + head, tail
	470
1685.1.46 by John Arbash Meinel Sorting functions by name.	471
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	472	def _win32_strip_local_trailing_slash(url):
	473	"""Strip slashes after the drive letter"""
	474	if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
	475	return url[:-1]
	476	else:
	477	return url
	478
	479
1685.1.47 by John Arbash Meinel s comes before u	480	def strip_trailing_slash(url):
	481	"""Strip trailing slash, except for root paths.
	482
	483	The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	484	This assumes that all URLs are valid netloc urls, such that they
	485	form:
	486	scheme://host/path
	487	It searches for ://, and then refuses to remove the next '/'.
	488	It can also handle relative paths
	489	Examples:
	490	path/to/foo => path/to/foo
	491	path/to/foo/ => path/to/foo
	492	http://host/path/ => http://host/path
	493	http://host/path => http://host/path
	494	http://host/ => http://host/
	495	file:/// => file:///
	496	file:///foo/ => file:///foo
	497	# This is unique on win32 platforms, and is the only URL
	498	# format which does it differently.
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	499	file:///c\|/ => file:///c:/
1685.1.47 by John Arbash Meinel s comes before u	500	"""
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	501	if not url.endswith('/'):
	502	# Nothing to do
	503	return url
2245.6.1 by Alexander Belchenko win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //	504	if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	505	return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst more code cleanup	506
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	507	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	508	if scheme_loc is None:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	509	# This is a relative path, as it has no scheme
	510	# so just chop off the last character
1685.1.47 by John Arbash Meinel s comes before u	511	return url[:-1]
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	512
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	513	if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	514	# Don't chop off anything if the only slash is the path
	515	# separating slash
1685.1.47 by John Arbash Meinel s comes before u	516	return url
1685.1.47 by John Arbash Meinel s comes before u	517
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	518	return url[:-1]
	519
1685.1.47 by John Arbash Meinel s comes before u	520
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	521	def unescape(url):
	522	"""Unescape relpath from url format.
	523
	524	This returns a Unicode path from a URL
	525	"""
	526	# jam 20060427 URLs are supposed to be ASCII only strings
	527	# If they are passed in as unicode, urllib.unquote
	528	# will return a UNICODE string, which actually contains
	529	# utf-8 bytes. So we have to ensure that they are
	530	# plain ASCII strings, or the final .decode will
	531	# try to encode the UNICODE => ASCII, and then decode
	532	# it into utf-8.
	533	try:
	534	url = str(url)
	535	except UnicodeError, e:
	536	raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
1685.1.80 by Wouter van Heyst more code cleanup	537
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	538	unquoted = urllib.unquote(url)
	539	try:
	540	unicode_path = unquoted.decode('utf-8')
	541	except UnicodeError, e:
	542	raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
	543	return unicode_path
	544
	545
	546	# These are characters that if escaped, should stay that way
	547	_no_decode_chars = ';/?:@&=+$,#'
	548	_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	549	_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	550	+ ['%02X' % o for o in _no_decode_ords])
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	551	_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
	552	+ [('%02X' % o, chr(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	553	#These entries get mapped to themselves
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	554	_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	555
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	556	# These characters shouldn't be percent-encoded, and it's always safe to
	557	# unencode them if they are.
	558	_url_dont_escape_characters = set(
	559	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	560	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	561	"0123456789" # Numbers
	562	"-._~" # Unreserved characters
	563	)
	564
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	565	# These characters should not be escaped
2167.2.2 by Aaron Bentley Update safe character list	566	_url_safe_characters = set(
	567	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	568	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	569	"0123456789" # Numbers
	570	"_.-!~*'()" # Unreserved characters
	571	"/;?:@&=+$," # Reserved characters
	572	"%#" # Extra reserved characters
	573	)
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	574
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	575	def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	576	"""Decode what you can for a URL, so that we get a nice looking path.
	577
	578	This will turn file:// urls into local paths, and try to decode
	579	any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	580
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	581	Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	582	need to stay as escapes are left alone.
	583
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	584	:param url: A 7-bit ASCII URL
	585	:param encoding: The final output encoding
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	586
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	587	:return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	588	specified encoding.
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	589	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	590	if encoding is None:
	591	raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	592	if url.startswith('file://'):
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	593	try:
	594	path = local_path_from_url(url)
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	595	path.encode(encoding)
	596	return path
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	597	except UnicodeError:
	598	return url
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	599
	600	# Split into sections to try to decode utf-8
	601	res = url.split('/')
	602	for i in xrange(1, len(res)):
	603	escaped_chunks = res[i].split('%')
	604	for j in xrange(1, len(escaped_chunks)):
	605	item = escaped_chunks[j]
	606	try:
	607	escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
	608	except KeyError:
	609	# Put back the percent symbol
	610	escaped_chunks[j] = '%' + item
	611	except UnicodeDecodeError:
	612	escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
	613	unescaped = ''.join(escaped_chunks)
	614	try:
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	615	decoded = unescaped.decode('utf-8')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	616	except UnicodeDecodeError:
	617	# If this path segment cannot be properly utf-8 decoded
	618	# after doing unescaping we will just leave it alone
	619	pass
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	620	else:
	621	try:
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	622	decoded.encode(encoding)
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	623	except UnicodeEncodeError:
	624	# If this chunk cannot be encoded in the local
	625	# encoding, then we should leave it alone
	626	pass
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	627	else:
	628	# Otherwise take the url decoded one
	629	res[i] = decoded
	630	return u'/'.join(res)
2512.4.1 by Ian Clatworthy Fixes #115491 - 'branch lp:projname' now creates ./projname as exected	631
	632
	633	def derive_to_location(from_location):
	634	"""Derive a TO_LOCATION given a FROM_LOCATION.
	635
	636	The normal case is a FROM_LOCATION of http://foo/bar => bar.
	637	The Right Thing for some logical destinations may differ though
	638	because no / may be present at all. In that case, the result is
	639	the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
	640	This latter case also applies when a Windows drive
	641	is used without a path, e.g. c:foo-bar => foo-bar.
	642	If no /, path separator or : is found, the from_location is returned.
	643	"""
	644	if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
	645	return os.path.basename(from_location.rstrip("/\\"))
	646	else:
	647	sep = from_location.find(":")
	648	if sep > 0:
	649	return from_location[sep+1:]
	650	else:
	651	return from_location
3242.3.26 by Aaron Bentley Implement rebase_url	652
3242.3.35 by Aaron Bentley Cleanups and documentation	653
3242.3.26 by Aaron Bentley Implement rebase_url	654	def _is_absolute(url):
	655	return (osutils.pathjoin('/foo', url) == url)
	656
3242.3.35 by Aaron Bentley Cleanups and documentation	657
3242.3.26 by Aaron Bentley Implement rebase_url	658	def rebase_url(url, old_base, new_base):
	659	"""Convert a relative path from an old base URL to a new base URL.
	660
	661	The result will be a relative path.
	662	Absolute paths and full URLs are returned unaltered.
	663	"""
	664	scheme, separator = _find_scheme_and_separator(url)
	665	if scheme is not None:
	666	return url
	667	if _is_absolute(url):
	668	return url
	669	old_parsed = urlparse.urlparse(old_base)
	670	new_parsed = urlparse.urlparse(new_base)
	671	if (old_parsed[:2]) != (new_parsed[:2]):
3242.3.33 by Aaron Bentley Handle relative URL stacking cleanly	672	raise errors.InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley Updates from review comments	673	return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson urlutils.rebase_url handles '..' path segments in 'url'	674	join(old_parsed[2], url))
3242.3.26 by Aaron Bentley Implement rebase_url	675
	676
	677	def determine_relative_path(from_path, to_path):
	678	"""Determine a relative path from from_path to to_path."""
	679	from_segments = osutils.splitpath(from_path)
	680	to_segments = osutils.splitpath(to_path)
	681	count = -1
	682	for count, (from_element, to_element) in enumerate(zip(from_segments,
	683	to_segments)):
	684	if from_element != to_element:
	685	break
	686	else:
	687	count += 1
	688	unique_from = from_segments[count:]
	689	unique_to = to_segments[count:]
	690	segments = (['..'] * len(unique_from) + unique_to)
	691	if len(segments) == 0:
	692	return '.'
	693	return osutils.pathjoin(*segments)
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	694
	695
	696
	697	def parse_url(url):
	698	"""Extract the server address, the credentials and the path from the url.
	699
	700	user, password, host and path should be quoted if they contain reserved
	701	chars.
	702
	703	:param url: an quoted url
	704
	705	:return: (scheme, user, password, host, port, path) tuple, all fields
	706	are unquoted.
	707	"""
	708	if isinstance(url, unicode):
	709	raise errors.InvalidURL('should be ascii:\n%r' % url)
	710	url = url.encode('utf-8')
	711	(scheme, netloc, path, params,
	712	query, fragment) = urlparse.urlparse(url, allow_fragments=False)
	713	user = password = host = port = None
	714	if '@' in netloc:
	715	user, host = netloc.rsplit('@', 1)
	716	if ':' in user:
	717	user, password = user.split(':', 1)
	718	password = urllib.unquote(password)
	719	user = urllib.unquote(user)
	720	else:
	721	host = netloc
	722
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	723	if ':' in host and not (host[0] == '[' and host[-1] == ']'): #there is port
	724	host, port = host.rsplit(':',1)
	725	try:
	726	port = int(port)
	727	except ValueError:
	728	raise errors.InvalidURL('invalid port number %s in url:\n%s' %
	729	(port, url))
4253.4.3 by Jelmer Vernooij Support empty host name.	730	if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
4253.4.2 by Jelmer Vernooij Still parse port in case of ipv6.	731	host = host[1:-1]
3873.3.2 by Martin Pool Accept ipv6 literals in URLs	732
3873.3.1 by Martin Pool Move Transport._split_url to urlutils, and ad a simple test	733	host = urllib.unquote(host)
	734	path = urllib.unquote(path)
	735
	736	return (scheme, user, password, host, port, path)